# 04. Pipeline Completo - OccultaShield

Este notebook integra todos los módulos optimizados para crear un pipeline completo de anonimización GDPR:

**Arquitectura Híbrida (Kornia + YOLO):**
1. **Detección de Caras**: Kornia FaceDetector (YuNet) - nativo GPU
2. **Detección de Personas**: YOLOv10 (nano/s/m según VRAM)
3. **Detección de Matrículas**: YOLO-LPR
4. **Edición**: KorniaEffects para efectos acelerados por GPU
5. **Verificación**: GemmaClient con clasificación LLM para huellas/documentos

**Flujo:**
```
Video → Detección Híbrida → Verificación GDPR → Anonimización GPU → Video Procesado
        (Kornia+YOLO)        (LLM + Graph)       (Kornia filters)
```

### 1. Imports
Importamos los componentes de los otros módulos (asumiendo que los archivos .py existen y son estables). Si usasemos solo los notebooks, copiaríamos el código aquí, pero importar mantiene este notebook limpio.

In [None]:
import sys
import os
import asyncio
import time
import logging
import nest_asyncio
import json
from pathlib import Path
from typing import Optional, List, Dict, Any, Tuple
from dataclasses import dataclass, field, asdict
from datetime import datetime

import cv2
import numpy as np
import torch
import torch.nn.functional as F

# Kornia para efectos GPU y detección de caras (YuNet)
try:
    import kornia
    import kornia.filters
    from kornia.contrib import FaceDetector, FaceDetectorResult
    KORNIA_AVAILABLE = True
    KORNIA_FACE_AVAILABLE = True
except ImportError:
    try:
        import kornia
        import kornia.filters
        KORNIA_AVAILABLE = True
        KORNIA_FACE_AVAILABLE = False
    except ImportError:
        KORNIA_AVAILABLE = False
        KORNIA_FACE_AVAILABLE = False

from ultralytics import YOLO
from scipy.optimize import linear_sum_assignment
from concurrent.futures import ThreadPoolExecutor

# Módulo de verificación GDPR (Neo4j + Gemma 3n)
try:
    sys.path.insert(0, str(Path("..").resolve()))
    from modules.verification import verify_image_detections
    VERIFICATION_MODULE_AVAILABLE = True
except ImportError as e:
    VERIFICATION_MODULE_AVAILABLE = False
    print(f"[!] Módulo de verificación no disponible: {e}")
    print("    Se usará verificación con GDPR_CONFIG hardcodeado")

nest_asyncio.apply()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger('full_pipeline')

print("=" * 70)
print("OCCULTASHIELD - Pipeline de Anonimización GDPR (Arquitectura Híbrida)")
print("=" * 70)
print(f"✓ PyTorch: {torch.__version__}")
print(f"✓ CUDA disponible: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"✓ GPU: {torch.cuda.get_device_name(0)}")
    vram_mb = torch.cuda.get_device_properties(0).total_memory // (1024**2)
    print(f"✓ VRAM: {vram_mb}MB")
print(f"✓ Kornia disponible: {KORNIA_AVAILABLE}")
print(f"✓ Kornia FaceDetector (YuNet): {KORNIA_FACE_AVAILABLE}")
print(f"✓ Módulo de Verificación (Neo4j + Gemma 3n): {VERIFICATION_MODULE_AVAILABLE}")

### 2. Mocks de Infraestructura
Como corremos en local sin base de datos real (SurrealDB) ni backend web:
- **`MockProgressManager`**: Imprime por consola en vez de enviar WebSockets al frontend.
- **`MockDB`**: Simula guardar datos devolviendo un ID falso, sin conectar a nada.
- **`ProcessingPhase`**: Constantes para saber en qué etapa estamos.

In [2]:
# =============================================================================
# CONFIGURACIÓN GDPR Y MOCKS
# =============================================================================

# Mapeo de severidad y acciones GDPR
GDPR_CONFIG = {
    "face": {"severity": "high", "action": "blur", "articles": ["9", "6"]},
    "fingerprint": {"severity": "high", "action": "pixelate", "articles": ["9", "6"]},
    "license_plate": {"severity": "high", "action": "pixelate", "articles": ["6", "17"]},
    "id_document": {"severity": "high", "action": "blur", "articles": ["9", "6", "32"]},
    "credit_card": {"severity": "high", "action": "pixelate", "articles": ["6", "32"]},
    "signature": {"severity": "medium", "action": "blur", "articles": ["6"]},
    "person": {"severity": "medium", "action": "blur", "articles": ["6", "13"]},
}

class MockProgressManager:
    """Simula el gestor de progreso WebSocket del backend"""
    async def register_video(self, video_id): 
        print(f"\n[PIPELINE] Iniciado: {video_id}")
    
    async def change_phase(self, video_id, phase, msg, **kwargs): 
        print(f"[FASE] {phase}: {msg}")
    
    async def update_progress(self, video_id, pct, cur, tot, msg): 
        if cur % 30 == 0 or pct >= 100:
            print(f"  └─ Progreso: {pct:3d}% | {msg}")
    
    async def complete(self, video_id, **kwargs): 
        print(f"\n[✓] COMPLETADO")
        for k, v in kwargs.items():
            print(f"    {k}: {v}")
    
    async def error(self, video_id, code, msg, **kwargs): 
        print(f"\n[✗] ERROR: {msg}")

class MockDB:
    """Simula la base de datos"""
    async def create(self, table, data):
        return [{"id": f"{table}:mock_{id(data)}", **data}]

class ProcessingPhase:
    DETECTING = "detecting"
    TRACKING = "tracking"
    VERIFYING = "verifying"
    EDITING = "editing"
    COMPLETED = "completed"

# =============================================================================
# MODELOS DE DATOS (copiados de 01_detection_module para independencia)
# =============================================================================

@dataclass
class BoundingBox:
    x1: float
    y1: float
    x2: float
    y2: float
    confidence: float
    frame: int

    @property
    def area(self) -> float:
        return (self.x2 - self.x1) * (self.y2 - self.y1)
    
    def to_dict(self) -> dict:
        return asdict(self)

@dataclass
class Capture:
    frame: int
    image_path: str
    bbox: BoundingBox
    reason: str
    timestamp: float

    def to_dict(self) -> dict:
        return asdict(self)

@dataclass
class TrackedDetection:
    track_id: int
    detection_type: str
    bbox_history: List[BoundingBox] = field(default_factory=list)
    captures: List[Capture] = field(default_factory=list)
    is_confirmed: bool = False
    
    @property
    def best_capture(self) -> Optional[Capture]:
        if not self.captures:
            return None
        return max(self.captures, key=lambda c: c.bbox.confidence)
    
    def add_bbox(self, bbox: BoundingBox):
        self.bbox_history.append(bbox)
    
    def to_dict(self) -> dict:
        return {
            "track_id": self.track_id,
            "detection_type": self.detection_type,
            "bbox_history": [b.to_dict() for b in self.bbox_history],
            "captures": [c.to_dict() for c in self.captures],
            "is_confirmed": self.is_confirmed,
            "total_frames": len(self.bbox_history)
        }

@dataclass
class DetectionResult:
    video_path: str
    total_frames: int
    fps: float
    duration_seconds: float
    width: int
    height: int
    detections: List[TrackedDetection] = field(default_factory=list)
    frames_processed: int = 0
    processing_time_seconds: float = 0.0

### 3. Orquestador (`VideoProcessor`) - Arquitectura Híbrida

Esta clase une todo el flujo usando **HybridDetectorManager** (Kornia + YOLO).

**Componentes:**
- **HybridDetectorManager**: Kornia FaceDetector (YuNet) + YOLOv10 para personas + YOLO-LPR para matrículas
- **KorniaEffects**: Blur/Pixelate GPU acelerado
- **GDPRConfig**: Acciones por tipo de detección

**Flujo `process_full_pipeline`:**
1. **Detectar**: Usa `HybridDetectorManager.detect_all()`:
   - Caras → Kornia FaceDetector (YuNet)
   - Personas → YOLOv10 (nano/s/m según VRAM)
   - Matrículas → YOLO-LPR
2. **Tracking**: Asigna IDs estables con `ObjectTracker`
3. **Verificar GDPR**: Clasifica violaciones según severidad
4. **Anonimizar**: Aplica efectos GPU con `KorniaEffects` (blur/pixelate)
5. **Completar**: Reporta éxito con métricas

In [None]:
# =============================================================================
# GPU MANAGER
# =============================================================================
class GPUManager:
    _instance = None
    
    def __new__(cls):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
            cls._instance._initialized = False
        return cls._instance
    
    def __init__(self):
        if self._initialized:
            return
        self._initialized = True
        
        if torch.cuda.is_available():
            self.device = "cuda"
            self.device_name = torch.cuda.get_device_name(0)
            self.vram_total_mb = torch.cuda.get_device_properties(0).total_memory // (1024**2)
        else:
            self.device = "cpu"
            self.device_name = "CPU"
            self.vram_total_mb = 0
    
    def get_strategy(self) -> Tuple[str, str, int]:
        vram_gb = self.vram_total_mb / 1024
        if vram_gb < 8:
            return "sequential", "nano", 4
        elif vram_gb < 16:
            return "parallel", "small", 16
        else:
            return "parallel", "medium", min(64, int(vram_gb * 2))

gpu_manager = GPUManager()

# =============================================================================
# HYBRID DETECTOR MANAGER - Kornia FaceDetector + YOLOv10
# =============================================================================
class HybridDetectorManager:
    """
    Gestor híbrido de detectores: Kornia AI (caras) + YOLO (personas, matrículas).
    
    Arquitectura:
    - Caras: Kornia YuNet (FaceDetector) - nativo GPU
    - Personas: YOLOv10 (nano/s/m según VRAM)
    - Matrículas: YOLO-LPR
    """
    
    YOLO_CONFIGS = {
        "nano": {"person": "yolov10n.pt", "plate": "yolov8n.pt"},
        "small": {"person": "yolov10s.pt", "plate": "yolov8s.pt"},
        "medium": {"person": "yolov10m.pt", "plate": "yolov8m.pt"},
    }
    
    def __init__(
        self, 
        gpu_mgr: GPUManager = None,
        person_model: str = None,
        plate_model: str = None,
        face_confidence: float = 0.5,
        person_confidence: float = 0.5
    ):
        self.gpu = gpu_mgr or gpu_manager
        self.device = self.gpu.device
        self.strategy, self.model_size, self.batch_size = self.gpu.get_strategy()
        
        self.face_confidence = face_confidence
        self.person_confidence = person_confidence
        
        self._init_face_detector()
        self._init_yolo_detectors(person_model, plate_model)
        
        logger.info(f"HybridDetectorManager: strategy={self.strategy}, size={self.model_size}, "
                   f"device={self.device}, kornia_face={KORNIA_FACE_AVAILABLE}")
    
    def _init_face_detector(self):
        """Inicializa Kornia FaceDetector (YuNet)"""
        self.face_detector = None
        
        if KORNIA_FACE_AVAILABLE:
            try:
                self.face_detector = FaceDetector().to(self.device)
                logger.info("✓ Kornia FaceDetector (YuNet) loaded")
            except Exception as e:
                logger.warning(f"Could not load Kornia FaceDetector: {e}")
        
        # Fallback OpenCV Haar
        if self.face_detector is None:
            self.face_cascade = cv2.CascadeClassifier(
                cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
            )
            logger.info("Using OpenCV Haar Cascade fallback for face detection")
    
    def _init_yolo_detectors(self, person_model: str, plate_model: str):
        """Inicializa detectores YOLO para personas y matrículas"""
        config = self.YOLO_CONFIGS[self.model_size]
        
        # Detector de personas (YOLOv10)
        person_path = person_model or config["person"]
        try:
            self.person_detector = YOLO(person_path)
            logger.info(f"✓ YOLO person detector loaded: {person_path}")
        except Exception as e:
            logger.error(f"Failed to load person model: {e}")
            self.person_detector = None
        
        # Detector de matrículas (opcional)
        self.plate_detector = None
        if plate_model and os.path.exists(plate_model):
            try:
                self.plate_detector = YOLO(plate_model)
                logger.info(f"✓ YOLO plate detector loaded: {plate_model}")
            except Exception as e:
                logger.warning(f"Could not load plate model: {e}")
    
    def _numpy_to_tensor(self, frame: np.ndarray) -> torch.Tensor:
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        tensor = torch.from_numpy(rgb).permute(2, 0, 1).unsqueeze(0).float() / 255.0
        return tensor.to(self.device)
    
    def detect_faces_kornia(self, tensor: torch.Tensor, frame_num: int) -> List[Tuple[str, BoundingBox]]:
        """
        Detecta caras usando Kornia FaceDetector (YuNet).
        Usa FaceDetectorResult para decodificar correctamente las detecciones.
        """
        if self.face_detector is None:
            return []
        
        results = []
        
        with torch.no_grad():
            detections = self.face_detector(tensor)
        
        # Decodificar cada detección usando FaceDetectorResult
        for det in detections:
            try:
                # Usar FaceDetectorResult para parsear correctamente
                face_result = FaceDetectorResult(det)
                
                # Verificar si hay detecciones
                if face_result.score.numel() == 0:
                    continue
                
                # Obtener coordenadas y scores
                top_left = face_result.top_left.int().tolist()
                bottom_right = face_result.bottom_right.int().tolist()
                scores = face_result.score.tolist()
                
                # Procesar cada cara detectada
                for score, tl, br in zip(scores, top_left, bottom_right):
                    if score >= self.face_confidence:
                        x1, y1 = float(tl[0]), float(tl[1])
                        x2, y2 = float(br[0]), float(br[1])
                        
                        bbox = BoundingBox(x1, y1, x2, y2, float(score), frame_num)
                        
                        if bbox.area >= 500:  # MIN_DETECTION_AREA
                            results.append(("face", bbox))
                            
            except Exception as e:
                logger.debug(f"Error processing face detection: {e}")
        
        return results
    
    def detect_faces_opencv(self, frame: np.ndarray, frame_num: int) -> List[Tuple[str, BoundingBox]]:
        """Fallback: Detección de caras con OpenCV Haar Cascade"""
        if not hasattr(self, 'face_cascade'):
            return []
        
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = self.face_cascade.detectMultiScale(gray, 1.1, 5, minSize=(30, 30))
        
        results = []
        for (x, y, w, h) in faces:
            bbox = BoundingBox(float(x), float(y), float(x + w), float(y + h), 0.8, frame_num)
            if bbox.area >= 500:
                results.append(("face", bbox))
        return results
    
    def detect_persons(self, frame: np.ndarray, frame_num: int) -> List[Tuple[str, BoundingBox]]:
        """Detecta personas usando YOLOv10"""
        if self.person_detector is None:
            return []
        
        results_yolo = self.person_detector.predict(
            frame, conf=self.person_confidence, verbose=False, device=self.device
        )
        
        results = []
        for r in results_yolo:
            for box in r.boxes:
                cls = int(box.cls[0])
                if cls == 0:  # COCO class 0 = person
                    x1, y1, x2, y2 = box.xyxy[0].tolist()
                    bbox = BoundingBox(x1, y1, x2, y2, float(box.conf[0]), frame_num)
                    if bbox.area >= 500:
                        results.append(("person", bbox))
        return results
    
    def detect_plates(self, frame: np.ndarray, frame_num: int) -> List[Tuple[str, BoundingBox]]:
        """Detecta matrículas usando YOLO-LPR"""
        if self.plate_detector is None:
            return []
        
        results_yolo = self.plate_detector.predict(
            frame, conf=self.person_confidence, verbose=False, device=self.device
        )
        
        results = []
        for r in results_yolo:
            for box in r.boxes:
                x1, y1, x2, y2 = box.xyxy[0].tolist()
                bbox = BoundingBox(x1, y1, x2, y2, float(box.conf[0]), frame_num)
                if bbox.area >= 100:
                    results.append(("license_plate", bbox))
        return results
    
    def detect_all(self, frame: np.ndarray, frame_num: int) -> List[Tuple[str, BoundingBox]]:
        """Ejecuta todos los detectores en un frame"""
        all_detections = []
        
        # Personas (YOLOv10)
        all_detections.extend(self.detect_persons(frame, frame_num))
        
        # Caras (Kornia o OpenCV fallback)
        if self.face_detector is not None:
            tensor = self._numpy_to_tensor(frame)
            all_detections.extend(self.detect_faces_kornia(tensor, frame_num))
        else:
            all_detections.extend(self.detect_faces_opencv(frame, frame_num))
        
        # Matrículas (YOLO)
        all_detections.extend(self.detect_plates(frame, frame_num))
        
        return all_detections
    
    def get_info(self) -> Dict:
        detectors = []
        if self.person_detector:
            detectors.append("person (YOLOv10)")
        if self.face_detector:
            detectors.append("face (Kornia YuNet)")
        elif hasattr(self, 'face_cascade'):
            detectors.append("face (OpenCV Haar)")
        if self.plate_detector:
            detectors.append("plate (YOLO)")
        
        return {
            "strategy": self.strategy,
            "model_size": self.model_size,
            "batch_size": self.batch_size,
            "device": self.device,
            "vram_total_mb": self.gpu.vram_total_mb,
            "detectors": detectors,
            "kornia_available": KORNIA_FACE_AVAILABLE
        }

# =============================================================================
# KORNIA EFFECTS (de 02_edition_module)
# =============================================================================
class KorniaEffects:
    def __init__(self, device: str = None):
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.noise_cache = {}
    
    def numpy_to_tensor(self, frame: np.ndarray) -> torch.Tensor:
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        tensor = torch.from_numpy(rgb).permute(2, 0, 1).unsqueeze(0)
        return tensor.float().div(255.0).to(self.device)
    
    def tensor_to_numpy(self, tensor: torch.Tensor) -> np.ndarray:
        arr = tensor.squeeze(0).permute(1, 2, 0)
        arr = arr.mul(255.0).clamp(0, 255).byte().cpu().numpy()
        return cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
    
    def blur_region(self, tensor, bbox, kernel_size=31, sigma=15.0):
        x1, y1, x2, y2 = bbox
        result = tensor.clone()
        roi = tensor[:, :, y1:y2, x1:x2]
        
        if KORNIA_AVAILABLE and roi.numel() > 0:
            ks = kernel_size | 1
            blurred = kornia.filters.gaussian_blur2d(roi, (ks, ks), (sigma, sigma))
            result[:, :, y1:y2, x1:x2] = blurred
        
        return result
    
    def pixelate_region(self, tensor, bbox, blocks=10, track_id=0):
        x1, y1, x2, y2 = bbox
        result = tensor.clone()
        roi = tensor[:, :, y1:y2, x1:x2]
        
        if roi.shape[2] < 2 or roi.shape[3] < 2:
            return result
        
        small = F.interpolate(roi, size=(blocks, blocks), mode='bilinear', align_corners=False)
        
        cache_key = (track_id, blocks)
        if cache_key not in self.noise_cache:
            gen = torch.Generator(device=self.device).manual_seed(track_id * 1000)
            self.noise_cache[cache_key] = torch.rand(1, 3, blocks, blocks, generator=gen, device=self.device) * 0.2 - 0.1
        
        small = (small + self.noise_cache[cache_key]).clamp(0, 1)
        pixelated = F.interpolate(small, size=(y2-y1, x2-x1), mode='nearest')
        result[:, :, y1:y2, x1:x2] = pixelated
        
        return result
    
    def clear_cache(self):
        self.noise_cache.clear()

kornia_effects = KorniaEffects() if KORNIA_AVAILABLE else None

# =============================================================================
# TRACKER SIMPLIFICADO
# =============================================================================
class ObjectTracker:
    def __init__(self, iou_threshold=0.3, max_age=30, min_hits=3):
        self.iou_threshold = iou_threshold
        self.max_age = max_age
        self.min_hits = min_hits
        self.tracks = {}
        self.next_id = 1
    
    def update(self, detections, frame_num):
        for t in self.tracks.values():
            t['age'] += 1
        
        confirmed = []
        for cls, bbox in detections:
            matched = False
            for tid, t in self.tracks.items():
                if t['cls'] == cls and self._iou(t['bbox'], bbox) >= self.iou_threshold:
                    t['bbox'] = bbox
                    t['age'] = 0
                    t['hits'] += 1
                    matched = True
                    break
            
            if not matched:
                self.tracks[self.next_id] = {'cls': cls, 'bbox': bbox, 'age': 0, 'hits': 1}
                self.next_id += 1
        
        dead = [tid for tid, t in self.tracks.items() if t['age'] > self.max_age]
        for tid in dead:
            del self.tracks[tid]
        
        for tid, t in self.tracks.items():
            if t['hits'] >= self.min_hits:
                confirmed.append((tid, t['cls'], t['bbox']))
        
        return confirmed
    
    def _iou(self, b1, b2):
        x1 = max(b1.x1, b2.x1)
        y1 = max(b1.y1, b2.y1)
        x2 = min(b1.x2, b2.x2)
        y2 = min(b1.y2, b2.y2)
        if x2 < x1 or y2 < y1:
            return 0
        inter = (x2 - x1) * (y2 - y1)
        union = b1.area + b2.area - inter
        return inter / union if union > 0 else 0

# =============================================================================
# GENERADOR DE JSON DE SALIDA
# =============================================================================
def generate_output_json(
    video_id: str,
    detection_result: DetectionResult,
    violations: List[Dict],
    output_path: Path,
    processing_time: float
) -> Dict[str, Any]:
    """
    Genera el JSON de resultados del pipeline con información GDPR completa.
    """
    # Agrupar por tipo de detección
    by_type = {}
    for track in detection_result.detections:
        t = track.detection_type
        if t not in by_type:
            by_type[t] = {"count": 0, "violations": 0}
        by_type[t]["count"] += 1
    
    # Contar violaciones por tipo
    for v in violations:
        t = v["track"].detection_type
        if t in by_type:
            by_type[t]["violations"] += 1
    
    # Construir estructura de violaciones
    violations_list = []
    for i, v in enumerate(violations, 1):
        track = v["track"]
        verification = v.get("verification", {})
        
        # Calcular rango de frames
        first_frame = track.bbox_history[0].frame if track.bbox_history else 0
        last_frame = track.bbox_history[-1].frame if track.bbox_history else 0
        duration = (last_frame - first_frame) / detection_result.fps if detection_result.fps > 0 else 0
        
        # Mejor captura
        best_capture_path = None
        if track.best_capture:
            best_capture_path = track.best_capture.image_path
        
        # Avg confidence del track
        avg_confidence = 0.0
        if track.bbox_history:
            avg_confidence = sum(b.confidence for b in track.bbox_history) / len(track.bbox_history)
        
        violations_list.append({
            "id": f"viol_{i:03d}",
            "track_id": track.track_id,
            "detection_type": track.detection_type,
            "severity": v.get("severity", "medium"),
            "confidence": verification.get("confidence", avg_confidence),
            "violated_articles": v.get("articles", []),
            "description": verification.get("description", f"Detected {track.detection_type} which constitutes personal data."),
            "reasoning": verification.get("reasoning", ""),
            "recommended_action": v.get("action", "blur"),
            "action_applied": v.get("action", "blur"),
            "capture_image": best_capture_path,
            "frame_range": {
                "first": first_frame,
                "last": last_frame
            },
            "duration_seconds": round(duration, 2),
            "total_frames": len(track.bbox_history)
        })
    
    # Determinar si se requiere consentimiento
    has_high_severity = any(v.get("severity") == "high" for v in violations)
    has_biometric = any("face" in v["track"].detection_type for v in violations)
    
    result = {
        "video_id": video_id,
        "processing_date": datetime.utcnow().isoformat() + "Z",
        "pipeline_version": "1.0.0",
        "architecture": "Hybrid (Kornia + YOLO)",
        "verification_method": "Neo4j + Gemma 3n" if VERIFICATION_MODULE_AVAILABLE else "Hardcoded GDPR_CONFIG",
        
        "video_metadata": {
            "original_path": detection_result.video_path,
            "output_path": str(output_path) if output_path else None,
            "duration_seconds": round(detection_result.duration_seconds, 2),
            "fps": detection_result.fps,
            "resolution": {
                "width": detection_result.width,
                "height": detection_result.height
            },
            "total_frames": detection_result.total_frames
        },
        
        "processing_stats": {
            "processing_time_seconds": round(processing_time, 2),
            "frames_processed": detection_result.frames_processed,
            "detections_total": len(detection_result.detections),
            "violations_total": len(violations)
        },
        
        "detections_summary": by_type,
        
        "violations": violations_list,
        
        "gdpr_compliance": {
            "requires_consent": has_high_severity or has_biometric,
            "legal_basis_required": "consent" if (has_high_severity or has_biometric) else "legitimate_interest",
            "contains_biometric_data": has_biometric,
            "data_categories": list(by_type.keys()),
            "data_retention_recommendation": "30 days",
            "anonymization_applied": True,
            "applicable_articles": list(set(
                article 
                for v in violations 
                for article in v.get("articles", [])
            ))
        }
    }
    
    return result


# =============================================================================
# VIDEO PROCESSOR - PIPELINE INTEGRADO (Arquitectura Híbrida)
# =============================================================================
class VideoProcessor:
    """
    Orquestador del pipeline completo de anonimización GDPR.
    
    Integra:
    - HybridDetectorManager: Kornia FaceDetector (YuNet) + YOLOv10
    - Módulo de Verificación: Neo4j + Gemma 3n (cuando está disponible)
    - KorniaEffects: Efectos GPU acelerados
    - Generador de JSON: Formulario de resultados GDPR
    """
    
    def __init__(self, person_model: str = None, plate_model: str = None):
        self.progress = MockProgressManager()
        self.db = MockDB()
        self.gpu = gpu_manager
        
        # Gestor de detección híbrido
        self.detector = HybridDetectorManager(
            gpu_mgr=self.gpu,
            person_model=person_model,
            plate_model=plate_model
        )
        
        self.batch_size = self.detector.batch_size
        
        # Efectos Kornia
        self.effects = kornia_effects
        
        logger.info(f"Pipeline initialized: {self.detector.get_info()}")
    
    async def _verify_with_module(self, track: TrackedDetection, captures_dir: Path) -> Dict[str, Any]:
        """
        Verifica una detección usando el módulo de verificación (Neo4j + Gemma 3n).
        """
        # Buscar imagen de captura para este track
        image_path = None
        if track.best_capture:
            image_path = track.best_capture.image_path
        
        if not image_path:
            # Buscar en directorio de capturas
            track_dir = captures_dir / f"track_{track.track_id}"
            if track_dir.exists():
                images = list(track_dir.glob("*.jpg"))
                if images:
                    image_path = str(images[0])
        
        if not image_path or not os.path.exists(image_path):
            # Sin imagen, usar verificación fallback
            return await self._verify_with_fallback(track)
        
        # Preparar detección para el módulo
        detection_data = {
            "id": track.track_id,
            "detection_type": track.detection_type,
            "confidence": track.bbox_history[0].confidence if track.bbox_history else 0.5,
            "bbox": track.bbox_history[0].to_dict() if track.bbox_history else {}
        }
        
        try:
            results = await verify_image_detections(image_path, [detection_data])
            if results and len(results) > 0:
                result = results[0]
                return {
                    "is_violation": result.get("is_violation", True),
                    "severity": result.get("severity", "medium"),
                    "articles": result.get("violated_articles", ["6"]),
                    "action": result.get("recommended_action", "blur"),
                    "verification": result
                }
        except Exception as e:
            logger.warning(f"Error en verificación con módulo: {e}, usando fallback")
        
        return await self._verify_with_fallback(track)
    
    async def _verify_with_fallback(self, track: TrackedDetection) -> Dict[str, Any]:
        """
        Verificación fallback usando GDPR_CONFIG hardcodeado.
        """
        gdpr_info = GDPR_CONFIG.get(track.detection_type, 
            {"severity": "low", "action": "blur", "articles": ["6"]})
        
        return {
            "is_violation": True,
            "severity": gdpr_info["severity"],
            "articles": gdpr_info["articles"],
            "action": gdpr_info["action"],
            "verification": {
                "is_violation": True,
                "severity": gdpr_info["severity"],
                "violated_articles": gdpr_info["articles"],
                "description": f"Detected {track.detection_type} which constitutes personal data.",
                "recommended_action": gdpr_info["action"],
                "confidence": 0.9
            }
        }
    
    async def process_full_pipeline(self, video_id: str, input_path: str) -> Dict[str, Any]:
        """
        Pipeline completo: Detección Híbrida → Verificación GDPR → JSON → Anonimización
        
        Returns:
            Dict con el JSON de resultados generado
        """
        start_time = time.time()
        output_json = None
        
        try:
            await self.progress.register_video(video_id)
            
            # ═══════════════════════════════════════════════════════════════
            # FASE 1: DETECCIÓN HÍBRIDA (Kornia + YOLO)
            # ═══════════════════════════════════════════════════════════════
            info = self.detector.get_info()
            await self.progress.change_phase(video_id, ProcessingPhase.DETECTING, 
                f"Detección híbrida: {', '.join(info['detectors'])}")
            
            output_dir = Path("../storage/captures") / video_id
            output_dir.mkdir(parents=True, exist_ok=True)
            
            detection_result = await self._detect_video(input_path, output_dir, video_id)
            
            # Resumen de detecciones
            by_type = {}
            for det in detection_result.detections:
                by_type.setdefault(det.detection_type, []).append(det)
            
            print(f"\n  Detecciones: {dict((k, len(v)) for k, v in by_type.items())}")
            
            # ═══════════════════════════════════════════════════════════════
            # FASE 2: VERIFICACIÓN GDPR (Neo4j + Gemma 3n o fallback)
            # ═══════════════════════════════════════════════════════════════
            verification_method = "Neo4j + Gemma 3n" if VERIFICATION_MODULE_AVAILABLE else "GDPR_CONFIG fallback"
            await self.progress.change_phase(video_id, ProcessingPhase.VERIFYING,
                f"Verificando cumplimiento GDPR ({verification_method})...")
            
            violations = []
            total_tracks = len(detection_result.detections)
            
            for idx, track in enumerate(detection_result.detections):
                if VERIFICATION_MODULE_AVAILABLE:
                    v_result = await self._verify_with_module(track, output_dir)
                else:
                    v_result = await self._verify_with_fallback(track)
                
                if v_result.get("is_violation", False):
                    violations.append({
                        "track": track,
                        "severity": v_result["severity"],
                        "action": v_result["action"],
                        "articles": v_result["articles"],
                        "verification": v_result.get("verification", {})
                    })
                
                # Progreso de verificación
                if (idx + 1) % 10 == 0 or idx == total_tracks - 1:
                    pct = int((idx + 1) / total_tracks * 100)
                    await self.progress.update_progress(video_id, pct, idx + 1, total_tracks,
                        f"Verificando track {idx + 1}/{total_tracks}")
            
            print(f"  Violaciones GDPR: {len(violations)}")
            print(f"  Método de verificación: {verification_method}")
            
            # ═══════════════════════════════════════════════════════════════
            # FASE 3: GENERACIÓN DE JSON DE RESULTADOS
            # ═══════════════════════════════════════════════════════════════
            processing_time = time.time() - start_time
            output_path = Path(f"../storage/processed/{video_id}_anonymized.mp4")
            
            output_json = generate_output_json(
                video_id=video_id,
                detection_result=detection_result,
                violations=violations,
                output_path=output_path,
                processing_time=processing_time
            )
            
            # Guardar JSON
            json_path = Path(f"../storage/processed/{video_id}_results.json")
            json_path.parent.mkdir(parents=True, exist_ok=True)
            
            with open(json_path, 'w', encoding='utf-8') as f:
                json.dump(output_json, f, indent=2, ensure_ascii=False)
            
            print(f"\n  JSON guardado: {json_path}")
            
            # ═══════════════════════════════════════════════════════════════
            # FASE 4: ANONIMIZACIÓN (Kornia GPU)
            # ═══════════════════════════════════════════════════════════════
            if violations:
                await self.progress.change_phase(video_id, ProcessingPhase.EDITING,
                    f"Anonimizando {len(violations)} elementos con Kornia GPU...")
                
                await self._anonymize_video(input_path, str(output_path), violations, video_id)
                
                # Actualizar JSON con confirmación de anonimización
                output_json["video_metadata"]["output_path"] = str(output_path)
                output_json["gdpr_compliance"]["anonymization_applied"] = True
                output_json["processing_stats"]["processing_time_seconds"] = round(time.time() - start_time, 2)
                
                # Re-guardar JSON actualizado
                with open(json_path, 'w', encoding='utf-8') as f:
                    json.dump(output_json, f, indent=2, ensure_ascii=False)
            
            # ═══════════════════════════════════════════════════════════════
            # COMPLETADO
            # ═══════════════════════════════════════════════════════════════
            elapsed = time.time() - start_time
            
            summary = {
                "architecture": "Hybrid (Kornia + YOLO)",
                "verification": verification_method,
                "detectors": info['detectors'],
                "total_detections": len(detection_result.detections),
                "total_violations": len(violations),
                "by_type": {k: len(v) for k, v in by_type.items()},
                "processing_time": f"{elapsed:.2f}s",
                "output_video": str(output_path) if violations else None,
                "output_json": str(json_path)
            }
            
            await self.progress.complete(video_id, **summary)
            
            return output_json
            
        except Exception as e:
            import traceback
            traceback.print_exc()
            await self.progress.error(video_id, "PIPELINE_ERROR", str(e))
            return None
    
    async def _detect_video(self, video_path: str, output_dir: Path, video_id: str) -> DetectionResult:
        """Fase de detección con HybridDetectorManager"""
        cap = cv2.VideoCapture(video_path)
        
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        fps = cap.get(cv2.CAP_PROP_FPS)
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        
        tracker = ObjectTracker()
        tracked = {}
        frame_num = 0
        
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            frame_num += 1
            
            # Detectar con HybridDetectorManager (Kornia + YOLO)
            detections = self.detector.detect_all(frame, frame_num)
            
            # Actualizar tracker
            confirmed = tracker.update(detections, frame_num)
            
            for tid, cls, bbox in confirmed:
                if tid not in tracked:
                    tracked[tid] = TrackedDetection(tid, cls)
                tracked[tid].add_bbox(bbox)
            
            # Progreso
            if frame_num % 30 == 0:
                pct = int(frame_num / total_frames * 100)
                await self.progress.update_progress(video_id, pct, frame_num, total_frames, 
                    f"Frame {frame_num}/{total_frames}")
        
        cap.release()
        
        return DetectionResult(
            video_path=video_path,
            total_frames=total_frames,
            fps=fps,
            duration_seconds=total_frames/fps if fps > 0 else 0,
            width=width,
            height=height,
            detections=list(tracked.values()),
            frames_processed=frame_num
        )
    
    async def _anonymize_video(self, input_path: str, output_path: str, violations: List[Dict], video_id: str):
        """Fase de anonimización con Kornia GPU"""
        cap = cv2.VideoCapture(input_path)
        
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = cap.get(cv2.CAP_PROP_FPS)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
        
        # Construir mapa de acciones por frame
        actions_by_frame = {}
        for v in violations:
            track = v["track"]
            for bbox in track.bbox_history:
                if bbox.frame not in actions_by_frame:
                    actions_by_frame[bbox.frame] = []
                actions_by_frame[bbox.frame].append({
                    "bbox": (int(bbox.x1), int(bbox.y1), int(bbox.x2), int(bbox.y2)),
                    "action": v["action"],
                    "track_id": track.track_id
                })
        
        frame_num = 0
        use_gpu = self.effects is not None and KORNIA_AVAILABLE
        
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            frame_num += 1
            
            if frame_num in actions_by_frame:
                if use_gpu:
                    tensor = self.effects.numpy_to_tensor(frame)
                    
                    for action in actions_by_frame[frame_num]:
                        bbox = action["bbox"]
                        if action["action"] == "blur":
                            tensor = self.effects.blur_region(tensor, bbox)
                        elif action["action"] == "pixelate":
                            tensor = self.effects.pixelate_region(tensor, bbox, blocks=15, track_id=action["track_id"])
                    
                    frame = self.effects.tensor_to_numpy(tensor)
                else:
                    for action in actions_by_frame[frame_num]:
                        x1, y1, x2, y2 = action["bbox"]
                        roi = frame[y1:y2, x1:x2]
                        if action["action"] == "blur":
                            k = 31
                            frame[y1:y2, x1:x2] = cv2.GaussianBlur(roi, (k, k), 0)
                        elif action["action"] == "pixelate":
                            small = cv2.resize(roi, (15, 15))
                            frame[y1:y2, x1:x2] = cv2.resize(small, (x2-x1, y2-y1), interpolation=cv2.INTER_NEAREST)
            
            out.write(frame)
            
            if frame_num % 30 == 0:
                pct = int(frame_num / total_frames * 100)
                await self.progress.update_progress(video_id, pct, frame_num, total_frames,
                    f"Anonimizando {frame_num}/{total_frames}")
        
        cap.release()
        out.release()
        
        print(f"  Video guardado: {output_path}")

### 4. Ejecución del Pipeline
Lanzamos el proceso completo sobre el video de prueba.

In [None]:
# =============================================================================
# EJECUCIÓN DEL PIPELINE - ARQUITECTURA HÍBRIDA CON VERIFICACIÓN GDPR
# =============================================================================

VIDEO_ID = "test_pipeline_hybrid"
VIDEO_PATH = "../storage/uploads/coche.mp4"

# Modelos personalizados (opcional)
CUSTOM_PERSON_MODEL = None  # e.g., "../models/yolov10m.pt"
CUSTOM_PLATE_MODEL = None   # e.g., "../models/yolo-lpr.pt"

# Variable global para acceder al JSON de resultados
pipeline_result_json = None

async def run_pipeline():
    global pipeline_result_json
    
    print("=" * 70)
    print("OCCULTASHIELD - Pipeline de Anonimización GDPR")
    print("Arquitectura Híbrida: Kornia FaceDetector + YOLOv10")
    print("Verificación: Neo4j + Gemma 3n" if VERIFICATION_MODULE_AVAILABLE else "Verificación: Fallback GDPR_CONFIG")
    print("=" * 70)
    
    # Mostrar configuración
    strategy, model_size, batch_size = gpu_manager.get_strategy()
    print(f"\nConfiguración auto-adaptativa:")
    print(f"  Device: {gpu_manager.device_name}")
    print(f"  VRAM: {gpu_manager.vram_total_mb}MB")
    print(f"  Estrategia: {strategy}")
    print(f"  Tamaño modelos: {model_size}")
    print(f"  Batch size: {batch_size}")
    print(f"  Kornia FaceDetector: {'Sí' if KORNIA_FACE_AVAILABLE else 'No (OpenCV fallback)'}")
    print(f"  Kornia Effects: {'Sí' if KORNIA_AVAILABLE else 'No (CPU fallback)'}")
    print(f"  Verificación GDPR: {'Neo4j + Gemma 3n' if VERIFICATION_MODULE_AVAILABLE else 'GDPR_CONFIG fallback'}")
    print()
    
    if not os.path.exists(VIDEO_PATH):
        print(f"[!] Video no encontrado: {VIDEO_PATH}")
        print("    Descarga un video de prueba para ejecutar el pipeline.")
        return None
    
    processor = VideoProcessor(
        person_model=CUSTOM_PERSON_MODEL,
        plate_model=CUSTOM_PLATE_MODEL
    )
    
    # Ejecutar pipeline y obtener JSON de resultados
    pipeline_result_json = await processor.process_full_pipeline(VIDEO_ID, VIDEO_PATH)
    
    print("\n" + "=" * 70)
    print("ARQUITECTURA DEL PIPELINE")
    print("=" * 70)
    print("""
┌─────────────────────────────────────────────────────────────────────┐
│                    PIPELINE COMPLETO OCCULTASHIELD                  │
├─────────────────────────────────────────────────────────────────────┤
│                                                                     │
│  FASE 1: DETECCIÓN                                                  │
│  ├─ Caras: Kornia FaceDetector (YuNet) - GPU nativo                 │
│  ├─ Personas: YOLOv10 (nano/s/m según VRAM)                         │
│  └─ Matrículas: YOLO-LPR (opcional)                                 │
│                                                                     │
│  FASE 2: VERIFICACIÓN GDPR                                          │
│  ├─ GraphRAG: Neo4j con artículos GDPR                              │
│  ├─ LLM: Gemma 3n para análisis contextual                          │
│  └─ Fallback: GDPR_CONFIG si módulo no disponible                   │
│                                                                     │
│  FASE 3: GENERACIÓN JSON                                            │
│  ├─ Formulario de resultados completo                               │
│  ├─ Violaciones con artículos GDPR                                  │
│  └─ Recomendaciones de cumplimiento                                 │
│                                                                     │
│  FASE 4: ANONIMIZACIÓN                                              │
│  ├─ KorniaEffects: Blur/Pixelate GPU acelerado                      │
│  └─ Tracking consistente por ID                                     │
│                                                                     │
└─────────────────────────────────────────────────────────────────────┘

Archivos de salida:
  - Video: ../storage/processed/{video_id}_anonymized.mp4
  - JSON:  ../storage/processed/{video_id}_results.json
    """)
    
    return pipeline_result_json

# Ejecutar
result = asyncio.run(run_pipeline())

# Mostrar resumen del JSON si está disponible
if result:
    print("\n" + "=" * 70)
    print("RESUMEN JSON DE RESULTADOS")
    print("=" * 70)
    print(f"  Video ID: {result.get('video_id')}")
    print(f"  Fecha: {result.get('processing_date')}")
    print(f"  Verificación: {result.get('verification_method')}")
    print(f"  Detecciones: {result.get('processing_stats', {}).get('detections_total', 0)}")
    print(f"  Violaciones: {result.get('processing_stats', {}).get('violations_total', 0)}")
    print(f"  Tiempo: {result.get('processing_stats', {}).get('processing_time_seconds', 0):.2f}s")
    print(f"\n  Artículos GDPR aplicables: {result.get('gdpr_compliance', {}).get('applicable_articles', [])}")
    print(f"  Requiere consentimiento: {result.get('gdpr_compliance', {}).get('requires_consent', False)}")

### 5. Ejecutar Módulo de Edición Independientemente

Esta celda permite ejecutar el módulo de edición (02) de forma independiente usando el JSON de resultados generado. Útil para:
- Re-procesar el video con diferentes configuraciones de efectos
- Aplicar anonimización después de revisar el JSON manualmente
- Testing y desarrollo del módulo de edición

In [None]:
# =============================================================================
# EJECUTAR MÓDULO DE EDICIÓN (02) INDEPENDIENTEMENTE
# =============================================================================
# Esta celda permite re-ejecutar la anonimización usando el JSON de resultados

async def run_edition_from_json(json_path: str, video_path: str = None, output_path: str = None):
    """
    Ejecuta el módulo de edición usando un JSON de resultados existente.
    
    Args:
        json_path: Ruta al JSON de resultados del pipeline
        video_path: Ruta al video original (opcional, se lee del JSON si no se proporciona)
        output_path: Ruta de salida (opcional, genera nueva si no se proporciona)
    
    Returns:
        Ruta del video procesado
    """
    # Cargar JSON de resultados
    with open(json_path, 'r', encoding='utf-8') as f:
        result_json = json.load(f)
    
    # Obtener paths
    if video_path is None:
        video_path = result_json["video_metadata"]["original_path"]
    
    if output_path is None:
        video_id = result_json["video_id"]
        output_path = f"../storage/processed/{video_id}_reedited.mp4"
    
    print("=" * 70)
    print("MÓDULO DE EDICIÓN - Procesamiento desde JSON")
    print("=" * 70)
    print(f"  Video entrada: {video_path}")
    print(f"  Video salida: {output_path}")
    print(f"  Violaciones a procesar: {len(result_json['violations'])}")
    print()
    
    # Verificar que el video existe
    if not os.path.exists(video_path):
        print(f"[!] Video no encontrado: {video_path}")
        return None
    
    # Configuración de efectos por tipo de detección
    EFFECT_CONFIG = {
        "face": {"action": "blur", "kernel_size": 31, "sigma": 15.0},
        "person": {"action": "blur", "kernel_size": 25, "sigma": 12.0},
        "license_plate": {"action": "pixelate", "blocks": 10},
        "fingerprint": {"action": "pixelate", "blocks": 8},
        "id_document": {"action": "blur", "kernel_size": 41, "sigma": 20.0},
        "credit_card": {"action": "pixelate", "blocks": 12},
        "signature": {"action": "blur", "kernel_size": 21, "sigma": 10.0},
    }
    
    # Cargar video
    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # Preparar writer
    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    # Construir mapa de acciones por frame desde JSON
    # Nota: El JSON almacena frame_range pero no bboxes individuales por frame
    # Para un uso completo, necesitaríamos reejecutar la detección o almacenar más datos
    # Aquí hacemos una versión simplificada que aplica el efecto en todo el rango
    
    print("  [!] Nota: Esta versión simplificada aplica efectos en el rango de frames")
    print("      Para bboxes precisos por frame, usar el pipeline completo")
    print()
    
    # Procesar frames
    frame_num = 0
    effects = KorniaEffects() if KORNIA_AVAILABLE else None
    
    # Para esta versión, no tenemos bboxes precisos por frame en el JSON
    # El JSON solo tiene frame_range. Para una implementación completa,
    # habría que almacenar también los bboxes por frame o re-detectar.
    
    # Esta celda está preparada para cuando se extienda el JSON con más datos
    print("  Procesando video...")
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_num += 1
        
        # Escribir frame (sin modificar en esta versión simplificada)
        # En una versión completa, aquí se aplicarían los efectos
        out.write(frame)
        
        if frame_num % 100 == 0:
            pct = int(frame_num / total_frames * 100)
            print(f"    Progreso: {pct}% ({frame_num}/{total_frames})")
    
    cap.release()
    out.release()
    
    print(f"\n  [✓] Video guardado: {output_path}")
    print("\n  Para anonimización completa con bboxes precisos, ejecutar el pipeline completo")
    
    return output_path

# =============================================================================
# EJEMPLO DE USO
# =============================================================================
# Descomentar para ejecutar la edición desde un JSON existente:

# json_result_path = f"../storage/processed/{VIDEO_ID}_results.json"
# if os.path.exists(json_result_path):
#     asyncio.run(run_edition_from_json(json_result_path))
# else:
#     print(f"JSON no encontrado: {json_result_path}")
#     print("Ejecuta primero el pipeline completo (celda anterior)")

print("Módulo de edición independiente cargado.")
print("Para ejecutar: asyncio.run(run_edition_from_json('path/to/results.json'))")