In [1]:
# Instalación de dependencias
!pip install transformers
!pip install datasets
!pip install decord
!pip install scikit-learn
!pip install matplotlib
!pip install seaborn
!pip install pandas
!pip install tqdm
!pip install scipy
!pip install torchmetrics
!pip install timm
!pip install av
!pip install einops
!pip install evaluate

Collecting decord
  Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl.metadata (422 bytes)
Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl (13.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.6/13.6 MB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: decord
Successfully installed decord-0.6.0
Collecting torchmetrics
  Downloading torchmetrics-1.7.1-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.14.3-py3-none-any.whl.metadata (5.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from t

In [2]:
# Importar bibliotecas necesarias
import os
import random
import math
import time
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from datetime import datetime
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torch.optim.lr_scheduler import OneCycleLR, CosineAnnealingLR
from torchvision import transforms
from torchmetrics.classification import BinaryAccuracy, BinaryPrecision, BinaryRecall, BinaryF1Score, BinarySpecificity
from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, average_precision_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from transformers import TimesformerForVideoClassification, TimesformerConfig, AutoImageProcessor
from transformers import get_cosine_schedule_with_warmup
import decord
from decord import VideoReader, cpu
import av
import gc
import warnings
import random
import io
import zipfile
import logging
import json
from pathlib import Path


In [3]:
# Montar Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Configurar advertencias
warnings.filterwarnings('ignore')

# Configurar logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


# Verificar disponibilidad de GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando dispositivo: {device}")

Usando dispositivo: cuda


In [5]:
# ============================== CONFIGURACIÓN DE HIPERPARÁMETROS ==============================

# Hiperparámetros generales
CONFIG = {
    # Rutas y nombres
    "dataset_path": "/content/drive/MyDrive/dataset_violencia",  # Ajustar según la ubicación real
    "output_dir": "/content/drive/MyDrive/Proyecto-Deteccion-Violencia/modelo_timesformer",
    "model_name": "timesformer_violence_detector",

    # Parámetros del modelo
    "pretrained_model": "facebook/timesformer-base-finetuned-k400",
    "num_frames": 8,              # Número de frames a procesar
    "image_size": 224,             # Tamaño de los frames (224x224)
    "num_classes": 2,              # Violencia / No violencia

    # Parámetros de entrenamiento - Transfer Learning
    "tl_batch_size": 8,            # Tamaño del batch
    "tl_num_epochs": 15,           # Número de épocas
    "tl_learning_rate": 5e-5,      # Learning rate inicial
    "tl_weight_decay": 1e-4,       # Regularización L2
    "tl_dropout": 0.2,             # Tasa de dropout
    "tl_warmup_ratio": 0.1,        # Proporción de steps para warmup

    # Parámetros de entrenamiento - Fine-Tuning
    "ft_batch_size": 4,            # Tamaño del batch (más pequeño para fine-tuning)
    "ft_num_epochs": 5,            # Número de épocas adicionales
    "ft_learning_rate": 1e-5,      # Learning rate más bajo para fine-tuning
    "ft_weight_decay": 5e-5,       # Regularización L2 suave

    # Umbral de clasificación
    "threshold": 0.6,              # Umbral de decisión para la clasificación

    # Configuración de checkpoints
    "save_steps": 100,             # Guardar cada X pasos
    "save_total_limit": 3,         # Máximo número de checkpoints a mantener
    "save_best_only": True,        # Guardar solo el mejor modelo

    # Métricas y evaluación
    "eval_steps": 50,              # Evaluar cada X pasos
    "logging_steps": 10,           # Mostrar métricas cada X pasos

    # Otros parámetros
    "seed": 42,                    # Semilla para reproducibilidad
    "mixed_precision": True,       # Usar precisión mixta para acelerar entrenamiento
}

# Crear directorio de salida si no existe
os.makedirs(CONFIG["output_dir"], exist_ok=True)

# Guardar configuración
with open(os.path.join(CONFIG["output_dir"], "config.json"), 'w') as f:
    json.dump(CONFIG, f, indent=4)

# Configurar reproducibilidad
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(CONFIG["seed"])

In [6]:
# ============================== CLASES PARA EL DATASET Y PROCESAMIENTO ==============================

# Clase para procesar y cargar los videos
class ViolenceVideoDataset(Dataset):
    def __init__(self, root_dir, split='train', transform=None, num_frames=16, image_size=224, max_videos=None):
        """
        Dataset para clasificación de violencia en videos

        Args:
            root_dir: Directorio raíz del dataset
            split: 'train', 'val' o 'test'
            transform: Transformaciones a aplicar
            num_frames: Número de frames a extraer de cada video
            image_size: Tamaño de los frames
            max_videos: Limitar número de videos (para pruebas rápidas)
        """
        self.root_dir = root_dir
        self.split = split
        self.transform = transform
        self.num_frames = num_frames
        self.image_size = image_size

        self.processor = AutoImageProcessor.from_pretrained(CONFIG["pretrained_model"])

        # Obtener las rutas de videos y etiquetas
        violence_dir = os.path.join(root_dir, split, 'violence')
        no_violence_dir = os.path.join(root_dir, split, 'no_violence')

        # Verificar que los directorios existan
        if not os.path.exists(violence_dir) or not os.path.exists(no_violence_dir):
            raise ValueError(f"No se encontraron los directorios del dataset en {root_dir}/{split}")

        violence_videos = glob.glob(os.path.join(violence_dir, '*.mp4'))
        no_violence_videos = glob.glob(os.path.join(no_violence_dir, '*.mp4'))

        if len(violence_videos) == 0 or len(no_violence_videos) == 0:
            raise ValueError(f"No se encontraron videos en {violence_dir} o {no_violence_dir}")

        # Limitar videos si es necesario
        if max_videos is not None:
            max_per_class = max_videos // 2
            violence_videos = violence_videos[:max_per_class]
            no_violence_videos = no_violence_videos[:max_per_class]

        self.video_paths = violence_videos + no_violence_videos
        self.labels = [1] * len(violence_videos) + [0] * len(no_violence_videos)

        # Mezclar los datos manteniendo correspondencia entre paths y labels
        combined = list(zip(self.video_paths, self.labels))
        random.shuffle(combined)
        self.video_paths, self.labels = zip(*combined)

        # Convertir a lista
        self.video_paths = list(self.video_paths)
        self.labels = list(self.labels)

        print(f"Cargados {len(self.video_paths)} videos para split '{split}'")
        print(f"Violencia: {len(violence_videos)}, No Violencia: {len(no_violence_videos)}")

    def __len__(self):
        return len(self.video_paths)

    def sample_frames_from_video(self, video_path):
        """Extrae frames uniformemente espaciados del video"""
        try:
            # Usar decord para cargar el video eficientemente
            video_reader = VideoReader(video_path, ctx=cpu(0))
            total_frames = len(video_reader)

            if total_frames == 0:
                raise ValueError(f"Video vacío o corrupto: {video_path}")

            # Seleccionar frames uniformemente
            indices = np.linspace(0, total_frames - 1, self.num_frames, dtype=int)
            frames = video_reader.get_batch(indices).asnumpy()  # (num_frames, H, W, C)

            # Aplicar resize y normalización
            processed_frames = []
            for frame in frames:
                # Redimensionar
                frame = transforms.functional.resize(
                    transforms.functional.to_tensor(frame),
                    (self.image_size, self.image_size)
                )
                processed_frames.append(frame)

            # Apilar frames
            frames_tensor = torch.stack(processed_frames)  # (T, C, H, W)

            # Mover dimensiones para coincidir con lo que espera el modelo (B, C, T, H, W)
            frames_tensor = frames_tensor.permute(1, 0, 2, 3).unsqueeze(0)

            return frames_tensor

        except Exception as e:
            logger.error(f"Error al procesar video {video_path}: {str(e)}")
            # Retornar un tensor de ceros en caso de error
            return torch.zeros((1, 3, self.num_frames, self.image_size, self.image_size))

    def __getitem__(self, idx):
        """Obtiene un item por su índice"""
        video_path = self.video_paths[idx]
        label = self.labels[idx]

        # Extraer frames
        frames = self.sample_frames_from_video(video_path)

        # Preprocesar frames usando el procesador de TimeSformer
        try:
            frames_list = list(frames.squeeze(0).permute(1, 0, 2, 3))  # Convertir a lista de tensores (T, C, H, W)
            inputs = self.processor(frames_list, return_tensors="pt", do_rescale=False)
            pixel_values = inputs['pixel_values'].squeeze(0)  # Eliminar dim de batch
        except Exception as e:
            logger.error(f"Error al procesar frames del video {video_path}: {str(e)}")
            # Crear input vacío de tamaño correcto en caso de error
            pixel_values = torch.zeros((3, self.num_frames, self.image_size, self.image_size))

        return {
            'pixel_values': pixel_values,
            'labels': torch.tensor(label, dtype=torch.long),
            'video_path': video_path
        }

# ============================== FUNCIONES DE ENTRENAMIENTO Y EVALUACIÓN ==============================

def train_epoch(model, dataloader, optimizer, scheduler, criterion, device, epoch, config):
    """Entrena el modelo durante una época completa"""
    model.train()
    epoch_loss = 0
    epoch_acc = 0
    epoch_precision = 0
    epoch_recall = 0
    epoch_f1 = 0

    # Métricas
    accuracy_metric = BinaryAccuracy().to(device)
    precision_metric = BinaryPrecision().to(device)
    recall_metric = BinaryRecall().to(device)
    f1_metric = BinaryF1Score().to(device)

    progress_bar = tqdm(enumerate(dataloader), total=len(dataloader), desc=f"Época {epoch+1}")

    for step, batch in progress_bar:
        try:
            # Mover datos al dispositivo
            pixel_values = batch['pixel_values'].to(device)
            labels = batch['labels'].to(device)

            # Forward pass
            outputs = model(pixel_values=pixel_values, labels=labels)
            loss = outputs.loss

            # Backward pass
            optimizer.zero_grad()
            loss.backward()

            # Clip gradient norm para estabilidad
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            optimizer.step()
            if scheduler is not None:
                scheduler.step()

            # Calcular métricas
            logits = outputs.logits
            preds = torch.sigmoid(logits[:, 1])  # Solo necesitamos la probabilidad de 'violencia'

            accuracy = accuracy_metric(preds, labels)
            precision = precision_metric(preds, labels)
            recall = recall_metric(preds, labels)
            f1 = f1_metric(preds, labels)

            # Acumular métricas
            epoch_loss += loss.item()
            epoch_acc += accuracy.item()
            epoch_precision += precision.item()
            epoch_recall += recall.item()
            epoch_f1 += f1.item()

            # Actualizar progreso
            progress_bar.set_postfix({
                'loss': loss.item(),
                'acc': accuracy.item(),
                'prec': precision.item(),
                'rec': recall.item(),
                'f1': f1.item()
            })

            # Liberar memoria explícitamente
            del pixel_values, labels, outputs, loss, logits, preds
            torch.cuda.empty_cache()

            # Guardar checkpoint cada ciertos pasos
            if (step + 1) % config["save_steps"] == 0:
                checkpoint_path = os.path.join(
                    config["output_dir"],
                    f"checkpoint_epoch{epoch+1}_step{step+1}.pt"
                )
                torch.save({
                    'epoch': epoch,
                    'step': step,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'scheduler_state_dict': scheduler.state_dict() if scheduler else None,
                    'loss': loss.item(),
                }, checkpoint_path)
                logger.info(f"Guardado checkpoint en {checkpoint_path}")

        except Exception as e:
            logger.error(f"Error en paso {step}, época {epoch+1}: {str(e)}")
            # Intentar liberar memoria y continuar
            torch.cuda.empty_cache()
            continue

    # Calcular métricas promedio
    num_batches = len(dataloader)
    epoch_loss /= num_batches
    epoch_acc /= num_batches
    epoch_precision /= num_batches
    epoch_recall /= num_batches
    epoch_f1 /= num_batches

    return {
        'loss': epoch_loss,
        'accuracy': epoch_acc,
        'precision': epoch_precision,
        'recall': epoch_recall,
        'f1': epoch_f1
    }

def evaluate(model, dataloader, criterion, device, config):
    """Evalúa el modelo en un conjunto de datos"""
    model.eval()
    all_preds = []
    all_labels = []
    val_loss = 0

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluando"):
            try:
                # Mover datos al dispositivo
                pixel_values = batch['pixel_values'].to(device)
                labels = batch['labels'].to(device)

                # Forward pass
                outputs = model(pixel_values=pixel_values, labels=labels)
                loss = outputs.loss
                val_loss += loss.item()

                # Obtener predicciones
                logits = outputs.logits
                preds = torch.sigmoid(logits[:, 1])  # Solo la probabilidad de 'violencia'

                # Guardar predicciones y etiquetas
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

                # Liberar memoria
                del pixel_values, labels, outputs, loss, logits, preds
                torch.cuda.empty_cache()

            except Exception as e:
                logger.error(f"Error al evaluar batch: {str(e)}")
                continue

    # Convertir a arrays numpy
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    if len(all_preds) == 0 or len(all_labels) == 0:
        logger.error("No se pudieron obtener predicciones o etiquetas durante la evaluación")
        return {
            'loss': float('inf'),
            'accuracy': 0,
            'precision': 0,
            'recall': 0,
            'specificity': 0,
            'f1': 0,
            'roc_auc': 0,
            'confusion_matrix': np.zeros((2, 2)),
            'fpr': np.array([0, 1]),
            'tpr': np.array([0, 0]),
            'predictions': np.array([]),
            'labels': np.array([])
        }

    # Calcular métricas
    binary_preds = (all_preds >= config["threshold"]).astype(int)

    accuracy = accuracy_score(all_labels, binary_preds)
    precision = precision_score(all_labels, binary_preds, zero_division=0)
    recall = recall_score(all_labels, binary_preds, zero_division=0)
    f1 = f1_score(all_labels, binary_preds, zero_division=0)

    # Calcular especificidad (TN / (TN + FP))
    tn, fp, fn, tp = confusion_matrix(all_labels, binary_preds, labels=[0, 1]).ravel()
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0

    # Calcular métricas de curva ROC
    try:
        fpr, tpr, _ = roc_curve(all_labels, all_preds)
        roc_auc = auc(fpr, tpr)
    except Exception as e:
        logger.error(f"Error al calcular curva ROC: {str(e)}")
        fpr, tpr = np.array([0, 1]), np.array([0, 0])
        roc_auc = 0

    # Matriz de confusión
    cm = confusion_matrix(all_labels, binary_preds, labels=[0, 1])

    # Pérdida promedio
    val_loss /= len(dataloader)

    # Crear informe de evaluación
    eval_results = {
        'loss': val_loss,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,  # Sensibilidad
        'specificity': specificity,
        'f1': f1,
        'roc_auc': roc_auc,
        'confusion_matrix': cm,
        'fpr': fpr,
        'tpr': tpr,
        'predictions': all_preds,
        'labels': all_labels
    }

    return eval_results

def plot_metrics(train_metrics, val_metrics, config):
    """Genera gráficos de métricas de entrenamiento"""
    metrics_to_plot = ['loss', 'accuracy', 'precision', 'recall', 'f1']
    epochs = range(1, len(train_metrics['loss']) + 1)

    plt.figure(figsize=(20, 15))

    for i, metric in enumerate(metrics_to_plot):
        plt.subplot(3, 2, i+1)
        plt.plot(epochs, train_metrics[metric], 'b-', label=f'Training {metric}')
        plt.plot(epochs, val_metrics[metric], 'r-', label=f'Validation {metric}')
        plt.title(f'{metric.capitalize()} vs. Epochs')
        plt.xlabel('Epochs')
        plt.ylabel(metric.capitalize())
        plt.legend()
        plt.grid(True)

    # Guardar figura
    plt.tight_layout()
    plt.savefig(os.path.join(config["output_dir"], "training_metrics.png"))
    plt.close()

def plot_confusion_matrix(cm, config, phase='transfer_learning'):
    """Visualiza la matriz de confusión"""
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['No Violencia', 'Violencia'],
                yticklabels=['No Violencia', 'Violencia'])
    plt.xlabel('Predicción')
    plt.ylabel('Real')
    plt.title('Matriz de Confusión')

    # Guardar figura
    plt.tight_layout()
    plt.savefig(os.path.join(config["output_dir"], f"confusion_matrix_{phase}.png"))
    plt.close()

def plot_roc_curve(fpr, tpr, roc_auc, config, phase='transfer_learning'):
    """Visualiza la curva ROC"""
    plt.figure(figsize=(10, 8))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC)')
    plt.legend(loc="lower right")

    # Guardar figura
    plt.tight_layout()
    plt.savefig(os.path.join(config["output_dir"], f"roc_curve_{phase}.png"))
    plt.close()

def save_evaluation_report(eval_results, config, phase='transfer_learning'):
    """Guarda un informe detallado de la evaluación"""
    report = {
        'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        'phase': phase,
        'metrics': {
            'loss': float(eval_results['loss']),
            'accuracy': float(eval_results['accuracy']),
            'precision': float(eval_results['precision']),
            'recall': float(eval_results['recall']),
            'specificity': float(eval_results['specificity']),
            'f1_score': float(eval_results['f1']),
            'roc_auc': float(eval_results['roc_auc']),
        },
        'confusion_matrix': eval_results['confusion_matrix'].tolist(),
    }

    # Guardar informe en formato JSON
    with open(os.path.join(config["output_dir"], f"evaluation_report_{phase}.json"), 'w') as f:
        json.dump(report, f, indent=4)

    # También guardar en formato de texto para mejor legibilidad
    with open(os.path.join(config["output_dir"], f"evaluation_report_{phase}.txt"), 'w') as f:
        f.write(f"Evaluación del Modelo - Fase: {phase}\n")
        f.write(f"Fecha: {report['timestamp']}\n")
        f.write("\n=== Métricas ===\n")
        f.write(f"Loss: {report['metrics']['loss']:.4f}\n")
        f.write(f"Accuracy: {report['metrics']['accuracy']:.4f}\n")
        f.write(f"Precision: {report['metrics']['precision']:.4f}\n")
        f.write(f"Recall (Sensibilidad): {report['metrics']['recall']:.4f}\n")
        f.write(f"Specificity: {report['metrics']['specificity']:.4f}\n")
        f.write(f"F1-Score: {report['metrics']['f1_score']:.4f}\n")
        f.write(f"ROC AUC: {report['metrics']['roc_auc']:.4f}\n")
        f.write("\n=== Matriz de Confusión ===\n")
        f.write("                Pred: No Violencia  Pred: Violencia\n")
        f.write(f"Real: No Violencia    {eval_results['confusion_matrix'][0][0]}               {eval_results['confusion_matrix'][0][1]}\n")
        f.write(f"Real: Violencia       {eval_results['confusion_matrix'][1][0]}               {eval_results['confusion_matrix'][1][1]}\n")

In [None]:
# ============================== ENTRENAMIENTO CON TRANSFER LEARNING ==============================

logger.info("Iniciando fase de Transfer Learning")
print("Iniciando fase de Transfer Learning")

# 1. Cargar modelo pre-entrenado
model = TimesformerForVideoClassification.from_pretrained(
    CONFIG["pretrained_model"],
    num_frames=CONFIG["num_frames"],
    image_size=CONFIG["image_size"],
    num_labels=CONFIG["num_classes"],  # Añadir esto para configurar 2 clases desde el inicio
    ignore_mismatched_sizes=True
)

# 2. Asegurarnos de que la clasificación final tiene el número correcto de salidas
if hasattr(model, 'classifier'):
    if hasattr(model.classifier, 'out_features') and model.classifier.out_features != CONFIG["num_classes"]:
        # Guardar dimensión de entrada
        in_features = model.classifier.in_features

        # Reemplazar completamente el clasificador
        model.classifier = nn.Linear(in_features, CONFIG["num_classes"])

        logger.info(f"Reemplazada capa de clasificación: {in_features} -> {CONFIG['num_classes']}")
    elif isinstance(model.classifier, nn.Sequential):
        # Si ya es una secuencia, asegurarnos que la última capa tenga la salida correcta
        last_layer = model.classifier[-1]
        if hasattr(last_layer, 'out_features') and last_layer.out_features != CONFIG["num_classes"]:
            in_features = last_layer.in_features
            model.classifier[-1] = nn.Linear(in_features, CONFIG["num_classes"])
            logger.info(f"Reemplazada última capa de clasificación: {in_features} -> {CONFIG['num_classes']}")


# 3. Congelar los parámetros del modelo base (excepto los de la capa de clasificación)
for name, param in model.named_parameters():
    if 'classifier' not in name:  # Congelar todos los parámetros excepto los del clasificador
        param.requires_grad = False

# Verificar parámetros entrenables vs congelados
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
logger.info(f"Parámetros entrenables: {trainable_params:,} / {total_params:,} ({100 * trainable_params / total_params:.2f}%)")
print(f"Parámetros entrenables: {trainable_params:,} / {total_params:,} ({100 * trainable_params / total_params:.2f}%)")

# Mover modelo a GPU
model.to(device)

# 4. Preparar datasets y dataloaders
train_dataset = ViolenceVideoDataset(
    root_dir=CONFIG["dataset_path"],
    split='train',
    num_frames=CONFIG["num_frames"],
    image_size=CONFIG["image_size"]
)

val_dataset = ViolenceVideoDataset(
    root_dir=CONFIG["dataset_path"],
    split='val',
    num_frames=CONFIG["num_frames"],
    image_size=CONFIG["image_size"]
)

train_dataloader = DataLoader(
    train_dataset,
    batch_size=CONFIG["tl_batch_size"],
    shuffle=True,
    num_workers=2,
    pin_memory=True
)

val_dataloader = DataLoader(
    val_dataset,
    batch_size=CONFIG["tl_batch_size"],
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

# 5. Configurar optimizador y scheduler
optimizer = optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=CONFIG["tl_learning_rate"],
    weight_decay=CONFIG["tl_weight_decay"]
)

# Calcular pasos totales para schedulers
num_training_steps = len(train_dataloader) * CONFIG["tl_num_epochs"]
num_warmup_steps = int(num_training_steps * CONFIG["tl_warmup_ratio"])

scheduler = get_cosine_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=num_training_steps
)

# 6. Criterio de pérdida (ya incluido en el modelo)
criterion = nn.CrossEntropyLoss()

# 7. Inicializar tracking de métricas
best_val_f1 = 0.0
train_metrics = {'loss': [], 'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
val_metrics = {'loss': [], 'accuracy': [], 'precision': [], 'recall': [], 'f1': []}

# 8. Entrenamiento por épocas
for epoch in range(CONFIG["tl_num_epochs"]):
    logger.info(f"Iniciando época {epoch+1}/{CONFIG['tl_num_epochs']}")
    print(f"Iniciando época {epoch+1}/{CONFIG['tl_num_epochs']}")

    # Entrenamiento
    train_results = train_epoch(
        model=model,
        dataloader=train_dataloader,
        optimizer=optimizer,
        scheduler=scheduler,
        criterion=criterion,
        device=device,
        epoch=epoch,
        config=CONFIG
    )

    # Evaluación
    eval_results = evaluate(
        model=model,
        dataloader=val_dataloader,
        criterion=criterion,
        device=device,
        config=CONFIG
    )

    # Registrar métricas
    for metric in ['loss', 'accuracy', 'precision', 'recall', 'f1']:
        train_metrics[metric].append(train_results[metric])
        val_metrics[metric].append(eval_results[metric])

    # Mostrar resultados
    logger.info(f"Epoch {epoch+1}/{CONFIG['tl_num_epochs']} - "
               f"Train Loss: {train_results['loss']:.4f}, "
               f"Val Loss: {eval_results['loss']:.4f}, "
               f"Train Acc: {train_results['accuracy']:.4f}, "
               f"Val Acc: {eval_results['accuracy']:.4f}, "
               f"Val F1: {eval_results['f1']:.4f}")
    print(f"Epoch {epoch+1}/{CONFIG['tl_num_epochs']} - "
          f"Train Loss: {train_results['loss']:.4f}, "
          f"Val Loss: {eval_results['loss']:.4f}, "
          f"Train Acc: {train_results['accuracy']:.4f}, "
          f"Val Acc: {eval_results['accuracy']:.4f}, "
          f"Val F1: {eval_results['f1']:.4f}")

    # Guardar mejor modelo
    if eval_results['f1'] > best_val_f1:
        best_val_f1 = eval_results['f1']

        # Guardar modelo
        model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_tl.pt")
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'val_f1': best_val_f1,
            'config': CONFIG,
        }, model_path)

        logger.info(f"Guardado mejor modelo con F1: {best_val_f1:.4f} en {model_path}")
        print(f"Guardado mejor modelo con F1: {best_val_f1:.4f} en {model_path}")

    # Guardar checkpoint al final de cada época
    checkpoint_path = os.path.join(CONFIG["output_dir"], f"checkpoint_epoch{epoch+1}.pt")
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
        'train_metrics': train_metrics,
        'val_metrics': val_metrics,
    }, checkpoint_path)

    logger.info(f"Guardado checkpoint de época {epoch+1} en {checkpoint_path}")
    print(f"Guardado checkpoint de época {epoch+1} en {checkpoint_path}")

# 9. Visualizar y guardar métricas
plot_metrics(train_metrics, val_metrics, CONFIG)

# 10. Evaluación final del mejor modelo
# Cargar el mejor modelo
best_model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_tl.pt")
checkpoint = torch.load(best_model_path)
model.load_state_dict(checkpoint['model_state_dict'])

logger.info(f"Evaluando mejor modelo de Transfer Learning (F1: {checkpoint['val_f1']:.4f})")
print(f"Evaluando mejor modelo de Transfer Learning (F1: {checkpoint['val_f1']:.4f})")

final_eval_results = evaluate(
    model=model,
    dataloader=val_dataloader,
    criterion=criterion,
    device=device,
    config=CONFIG
)

# Visualizar matriz de confusión
plot_confusion_matrix(final_eval_results['confusion_matrix'], CONFIG, phase='transfer_learning')

# Visualizar curva ROC
plot_roc_curve(
    final_eval_results['fpr'],
    final_eval_results['tpr'],
    final_eval_results['roc_auc'],
    CONFIG,
    phase='transfer_learning'
)

# Guardar informe detallado
save_evaluation_report(final_eval_results, CONFIG, phase='transfer_learning')

logger.info("Completada fase de Transfer Learning")
print("Completada fase de Transfer Learning")

# Guardar resultados para usarlos en etapas posteriores
tl_results = final_eval_results

Iniciando fase de Transfer Learning


Some weights of TimesformerForVideoClassification were not initialized from the model checkpoint at facebook/timesformer-base-finetuned-k400 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([400, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([400]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Parámetros entrenables: 1,538 / 121,260,290 (0.00%)


preprocessor_config.json:   0%|          | 0.00/412 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Cargados 8000 videos para split 'train'
Violencia: 4000, No Violencia: 4000
Cargados 1500 videos para split 'val'
Violencia: 750, No Violencia: 750
Iniciando época 1/15


Época 1:  10%|▉         | 99/1000 [03:57<21:12,  1.41s/it, loss=0.981, acc=0.25, prec=0.4, rec=0.4, f1=0.4]   ERROR:__main__:Error en paso 99, época 1: cannot access local variable 'loss' where it is not associated with a value
Época 1:  20%|█▉        | 199/1000 [06:19<33:15,  2.49s/it, loss=0.725, acc=0.375, prec=0.2, rec=0.5, f1=0.286]ERROR:__main__:Error en paso 199, época 1: cannot access local variable 'loss' where it is not associated with a value
Época 1:  30%|██▉       | 299/1000 [08:42<18:57,  1.62s/it, loss=0.988, acc=0.25, prec=0, rec=0, f1=0]      ERROR:__main__:Error en paso 299, época 1: cannot access local variable 'loss' where it is not associated with a value
Época 1:  40%|███▉      | 399/1000 [10:57<14:41,  1.47s/it, loss=1.03, acc=0.375, prec=0.5, rec=0.4, f1=0.444]ERROR:__main__:Error en paso 399, época 1: cannot access local variable 'loss' where it is not associated with a value
Época 1:  50%|████▉     | 499/1000 [13:19<11:02,  1.32s/it, loss=0.644, acc=0.25, prec

Epoch 1/15 - Train Loss: 0.6755, Val Loss: 0.3881, Train Acc: 0.4586, Val Acc: 0.6793, Val F1: 0.5575
Guardado mejor modelo con F1: 0.5575 en /content/drive/MyDrive/Proyecto-Deteccion-Violencia/modelo_timesformer/timesformer_violence_detector_best_tl.pt
Guardado checkpoint de época 1 en /content/drive/MyDrive/Proyecto-Deteccion-Violencia/modelo_timesformer/checkpoint_epoch1.pt
Iniciando época 2/15


Época 2:  10%|▉         | 99/1000 [01:55<17:12,  1.15s/it, loss=0.353, acc=0.875, prec=1, rec=0.8, f1=0.889]    ERROR:__main__:Error en paso 99, época 2: cannot access local variable 'loss' where it is not associated with a value
Época 2:  20%|█▉        | 199/1000 [03:48<15:08,  1.13s/it, loss=0.458, acc=0.75, prec=1, rec=0.667, f1=0.8]ERROR:__main__:Error en paso 199, época 2: cannot access local variable 'loss' where it is not associated with a value
Época 2:  30%|██▉       | 299/1000 [05:41<13:09,  1.13s/it, loss=0.234, acc=1, prec=1, rec=1, f1=1]         ERROR:__main__:Error en paso 299, época 2: cannot access local variable 'loss' where it is not associated with a value
Época 2:  40%|███▉      | 399/1000 [07:34<11:15,  1.12s/it, loss=0.206, acc=0.875, prec=1, rec=0.667, f1=0.8]    ERROR:__main__:Error en paso 399, época 2: cannot access local variable 'loss' where it is not associated with a value
Época 2:  50%|████▉     | 499/1000 [09:27<09:23,  1.13s/it, loss=0.198, acc=0.75, pr

Epoch 2/15 - Train Loss: 0.2773, Val Loss: 0.2023, Train Acc: 0.8586, Val Acc: 0.8840, Val F1: 0.8721
Guardado mejor modelo con F1: 0.8721 en /content/drive/MyDrive/Proyecto-Deteccion-Violencia/modelo_timesformer/timesformer_violence_detector_best_tl.pt
Guardado checkpoint de época 2 en /content/drive/MyDrive/Proyecto-Deteccion-Violencia/modelo_timesformer/checkpoint_epoch2.pt
Iniciando época 3/15


Época 3:  10%|▉         | 99/1000 [01:58<17:09,  1.14s/it, loss=0.077, acc=1, prec=1, rec=1, f1=1] ERROR:__main__:Error en paso 99, época 3: cannot access local variable 'loss' where it is not associated with a value
Época 3:  20%|█▉        | 199/1000 [03:52<15:31,  1.16s/it, loss=0.272, acc=0.875, prec=0.857, rec=1, f1=0.923]ERROR:__main__:Error en paso 199, época 3: cannot access local variable 'loss' where it is not associated with a value
Época 3:  30%|██▉       | 299/1000 [05:44<13:12,  1.13s/it, loss=0.181, acc=1, prec=1, rec=1, f1=1]            ERROR:__main__:Error en paso 299, época 3: cannot access local variable 'loss' where it is not associated with a value
Época 3:  40%|███▉      | 399/1000 [07:38<11:20,  1.13s/it, loss=0.135, acc=0.875, prec=1, rec=0.833, f1=0.909]ERROR:__main__:Error en paso 399, época 3: cannot access local variable 'loss' where it is not associated with a value
Época 3:  50%|████▉     | 499/1000 [09:30<09:25,  1.13s/it, loss=0.321, acc=0.875, prec=0.75,

Epoch 3/15 - Train Loss: 0.1906, Val Loss: 0.1640, Train Acc: 0.9140, Val Acc: 0.9120, Val F1: 0.9060
Guardado mejor modelo con F1: 0.9060 en /content/drive/MyDrive/Proyecto-Deteccion-Violencia/modelo_timesformer/timesformer_violence_detector_best_tl.pt
Guardado checkpoint de época 3 en /content/drive/MyDrive/Proyecto-Deteccion-Violencia/modelo_timesformer/checkpoint_epoch3.pt
Iniciando época 4/15


Época 4:  10%|▉         | 99/1000 [01:58<17:21,  1.16s/it, loss=0.301, acc=0.875, prec=0.75, rec=1, f1=0.857]ERROR:__main__:Error en paso 99, época 4: cannot access local variable 'loss' where it is not associated with a value
Época 4:  20%|█▉        | 199/1000 [03:51<15:10,  1.14s/it, loss=0.504, acc=0.875, prec=0.833, rec=1, f1=0.909]  ERROR:__main__:Error en paso 199, época 4: cannot access local variable 'loss' where it is not associated with a value
Época 4:  30%|██▉       | 299/1000 [05:45<13:09,  1.13s/it, loss=0.324, acc=0.75, prec=0.667, rec=1, f1=0.8] ERROR:__main__:Error en paso 299, época 4: cannot access local variable 'loss' where it is not associated with a value
Época 4:  38%|███▊      | 376/1000 [07:10<11:41,  1.12s/it, loss=0.00365, acc=1, prec=1, rec=1, f1=1]

In [None]:
# ============================== FINE TUNING ==============================

logger.info("Iniciando fase de Fine-Tuning")

# Cargar el mejor modelo de Transfer Learning
best_tl_model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_tl.pt")
checkpoint = torch.load(best_tl_model_path)

# Comprobar si ya tenemos el modelo cargado (de la celda anterior) o necesitamos cargarlo
try:
    # Intentar acceder al modelo, si no está definido, lanzará una excepción
    model
    logger.info("Usando modelo ya cargado de celda anterior")
    # Cargar estado del modelo desde checkpoint
    model.load_state_dict(checkpoint['model_state_dict'])
except NameError:
    # Si el modelo no está definido, crear uno nuevo y cargarlo
    logger.info("Cargando modelo desde checkpoint")
    model = TimesformerForVideoClassification.from_pretrained(
        CONFIG["pretrained_model"],
        num_frames=CONFIG["num_frames"],
        image_size=CONFIG["image_size"],
    )

    # Adaptarlo a nuestra tarea si es necesario
    if model.classifier.out_features != CONFIG["num_classes"]:
        model.classifier = nn.Sequential(
            nn.Dropout(CONFIG["tl_dropout"]),
            nn.Linear(model.classifier.in_features, CONFIG["num_classes"])
        )

    # Cargar estado del modelo desde checkpoint
    model.load_state_dict(checkpoint['model_state_dict'])

# 1. Descongelar todos los parámetros del modelo
for param in model.parameters():
    param.requires_grad = True

# Verificar parámetros entrenables
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
logger.info(f"Parámetros entrenables: {trainable_params:,} / {total_params:,} ({100 * trainable_params / total_params:.2f}%)")

# Asegurar que el modelo está en el dispositivo correcto
model.to(device)

# 2. Preparar datasets y dataloaders (mismos que antes)
train_dataset = ViolenceVideoDataset(
    root_dir=CONFIG["dataset_path"],
    split='train',
    num_frames=CONFIG["num_frames"],
    image_size=CONFIG["image_size"]
)

val_dataset = ViolenceVideoDataset(
    root_dir=CONFIG["dataset_path"],
    split='val',
    num_frames=CONFIG["num_frames"],
    image_size=CONFIG["image_size"]
)

train_dataloader = DataLoader(
    train_dataset,
    batch_size=CONFIG["ft_batch_size"],  # Tamaño de batch más pequeño para fine-tuning
    shuffle=True,
    num_workers=2,
    pin_memory=True
)

val_dataloader = DataLoader(
    val_dataset,
    batch_size=CONFIG["ft_batch_size"],
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

# 3. Configurar optimizador con learning rate diferenciado
# Usar learning rates más pequeños para capas base y más grandes para capas superiores
param_groups = [
    {
        'params': [p for n, p in model.named_parameters() if 'classifier' not in n],
        'lr': CONFIG["ft_learning_rate"] * 0.1  # LR más bajo para el backbone
    },
    {
        'params': [p for n, p in model.named_parameters() if 'classifier' in n],
        'lr': CONFIG["ft_learning_rate"]  # LR normal para el clasificador
    }
]

optimizer = optim.AdamW(
    param_groups,
    weight_decay=CONFIG["ft_weight_decay"]
)

# Calcular pasos totales para scheduler
num_training_steps = len(train_dataloader) * CONFIG["ft_num_epochs"]

# Scheduler con cosine annealing
scheduler = CosineAnnealingLR(
    optimizer,
    T_max=num_training_steps,
    eta_min=1e-6
)

# 4. Criterio de pérdida (ya incluido en el modelo)
criterion = nn.CrossEntropyLoss()

# 5. Inicializar tracking de métricas
best_val_f1 = 0.0
train_metrics = {'loss': [], 'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
val_metrics = {'loss': [], 'accuracy': [], 'precision': [], 'recall': [], 'f1': []}

# 6. Entrenamiento por épocas
for epoch in range(CONFIG["ft_num_epochs"]):
    logger.info(f"Iniciando época {epoch+1}/{CONFIG['ft_num_epochs']} (fine-tuning)")

    # Entrenamiento
    train_results = train_epoch(
        model=model,
        dataloader=train_dataloader,
        optimizer=optimizer,
        scheduler=scheduler,
        criterion=criterion,
        device=device,
        epoch=epoch,
        config=CONFIG
    )

    # Evaluación
    eval_results = evaluate(
        model=model,
        dataloader=val_dataloader,
        criterion=criterion,
        device=device,
        config=CONFIG
    )

    # Registrar métricas
    for metric in ['loss', 'accuracy', 'precision', 'recall', 'f1']:
        train_metrics[metric].append(train_results[metric])
        val_metrics[metric].append(eval_results[metric])

    # Mostrar resultados
    logger.info(f"Epoch {epoch+1}/{CONFIG['ft_num_epochs']} (FT) - "
               f"Train Loss: {train_results['loss']:.4f}, "
               f"Val Loss: {eval_results['loss']:.4f}, "
               f"Train Acc: {train_results['accuracy']:.4f}, "
               f"Val Acc: {eval_results['accuracy']:.4f}, "
               f"Val F1: {eval_results['f1']:.4f}")

    # Guardar mejor modelo
    if eval_results['f1'] > best_val_f1:
        best_val_f1 = eval_results['f1']

        # Guardar modelo
        model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_ft.pt")
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'val_f1': best_val_f1,
            'config': CONFIG,
        }, model_path)

        logger.info(f"Guardado mejor modelo (FT) con F1: {best_val_f1:.4f} en {model_path}")

    # Guardar checkpoint al final de cada época
    checkpoint_path = os.path.join(CONFIG["output_dir"], f"checkpoint_ft_epoch{epoch+1}.pt")
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
        'train_metrics': train_metrics,
        'val_metrics': val_metrics,
    }, checkpoint_path)

    logger.info(f"Guardado checkpoint de fine-tuning época {epoch+1} en {checkpoint_path}")

# 7. Visualizar y guardar métricas
# Crear nuevas visualizaciones para fine-tuning
plt.figure(figsize=(20, 15))

metrics_to_plot = ['loss', 'accuracy', 'precision', 'recall', 'f1']
epochs = range(1, len(train_metrics['loss']) + 1)

for i, metric in enumerate(metrics_to_plot):
    plt.subplot(3, 2, i+1)
    plt.plot(epochs, train_metrics[metric], 'b-', label=f'Training {metric}')
    plt.plot(epochs, val_metrics[metric], 'r-', label=f'Validation {metric}')
    plt.title(f'{metric.capitalize()} vs. Epochs (Fine-Tuning)')
    plt.xlabel('Epochs')
    plt.ylabel(metric.capitalize())
    plt.legend()
    plt.grid(True)

plt.tight_layout()
plt.savefig(os.path.join(CONFIG["output_dir"], "fine_tuning_metrics.png"))
plt.close()

# 8. Evaluación final del mejor modelo
# Cargar el mejor modelo
best_model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_ft.pt")
checkpoint = torch.load(best_model_path)
model.load_state_dict(checkpoint['model_state_dict'])

logger.info(f"Evaluando mejor modelo de Fine-Tuning (F1: {checkpoint['val_f1']:.4f})")

final_eval_results = evaluate(
    model=model,
    dataloader=val_dataloader,
    criterion=criterion,
    device=device,
    config=CONFIG
)

# Visualizar matriz de confusión
plot_confusion_matrix(final_eval_results['confusion_matrix'], CONFIG, phase='fine_tuning')

# Visualizar curva ROC
plot_roc_curve(
    final_eval_results['fpr'],
    final_eval_results['tpr'],
    final_eval_results['roc_auc'],
    CONFIG,
    phase='fine_tuning'
)

# Guardar informe detallado
save_evaluation_report(final_eval_results, CONFIG, phase='fine_tuning')

logger.info("Completada fase de Fine-Tuning")

# Guardar resultados para usarlos en etapas posteriores
ft_results = final_eval_results

In [None]:
# ============================== EVALUACIÓN EN CONJUNTO DE PRUEBA ==============================

logger.info("Evaluando modelo en conjunto de prueba")

# Cargar el mejor modelo de Fine-Tuning
best_ft_model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_ft.pt")

# Comprobar si ya tenemos el modelo cargado
try:
    # Intentar acceder al modelo
    model
    logger.info("Usando modelo ya cargado de celda anterior")
    # Cargar estado del mejor modelo de fine-tuning
    checkpoint = torch.load(best_ft_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])
except NameError:
    # Si el modelo no está definido, crear uno nuevo y cargarlo
    logger.info("Cargando modelo desde checkpoint")
    model = TimesformerForVideoClassification.from_pretrained(
        CONFIG["pretrained_model"],
        num_frames=CONFIG["num_frames"],
        image_size=CONFIG["image_size"],
    )

    # Adaptarlo a nuestra tarea
    if model.classifier.out_features != CONFIG["num_classes"]:
        model.classifier = nn.Sequential(
            nn.Dropout(CONFIG["tl_dropout"]),
            nn.Linear(model.classifier.in_features, CONFIG["num_classes"])
        )

    checkpoint = torch.load(best_ft_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])

# Asegurar que el modelo está en el dispositivo correcto
model.to(device)
model.eval()

# Cargar dataset de prueba
test_dataset = ViolenceVideoDataset(
    root_dir=CONFIG["dataset_path"],
    split='test',
    num_frames=CONFIG["num_frames"],
    image_size=CONFIG["image_size"]
)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=CONFIG["ft_batch_size"],
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

# Evaluar
criterion = nn.CrossEntropyLoss()
test_results = evaluate(
    model=model,
    dataloader=test_dataloader,
    criterion=criterion,
    device=device,
    config=CONFIG
)

# Visualizar matriz de confusión
plot_confusion_matrix(test_results['confusion_matrix'], CONFIG, phase='test')

# Visualizar curva ROC
plot_roc_curve(
    test_results['fpr'],
    test_results['tpr'],
    test_results['roc_auc'],
    CONFIG,
    phase='test'
)

# Generar y guardar reporte detallado
save_evaluation_report(test_results, CONFIG, phase='test')

# Métricas adicionales: Precision-Recall curve
precision, recall, _ = precision_recall_curve(
    test_results['labels'],
    test_results['predictions']
)
pr_auc = average_precision_score(test_results['labels'], test_results['predictions'])

# Graficar curva Precision-Recall
plt.figure(figsize=(10, 8))
plt.plot(recall, precision, color='blue', lw=2, label=f'PR curve (AP = {pr_auc:.2f})')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend(loc="lower left")
plt.grid(True)
plt.savefig(os.path.join(CONFIG["output_dir"], "precision_recall_curve_test.png"))
plt.close()

# Análisis de mejores umbrales
# Calcular métricas para diferentes umbrales
thresholds = np.linspace(0.1, 0.9, 9)
threshold_metrics = []

for threshold in thresholds:
    binary_preds = (test_results['predictions'] >= threshold).astype(int)

    acc = accuracy_score(test_results['labels'], binary_preds)
    prec = precision_score(test_results['labels'], binary_preds, zero_division=0)
    rec = recall_score(test_results['labels'], binary_preds, zero_division=0)
    f1 = f1_score(test_results['labels'], binary_preds, zero_division=0)

    threshold_metrics.append({
        'threshold': threshold,
        'accuracy': acc,
        'precision': prec,
        'recall': rec,
        'f1': f1
    })

# Convertir a DataFrame para mejor visualización
threshold_df = pd.DataFrame(threshold_metrics)

# Graficar métricas vs umbral
plt.figure(figsize=(12, 8))
for metric in ['accuracy', 'precision', 'recall', 'f1']:
    plt.plot(threshold_df['threshold'], threshold_df[metric], marker='o', label=metric)

plt.xlabel('Umbral de decisión')
plt.ylabel('Valor de métrica')
plt.title('Métricas vs Umbral de decisión')
plt.legend()
plt.grid(True)
plt.savefig(os.path.join(CONFIG["output_dir"], "threshold_analysis.png"))
plt.close()

# Encontrar mejor umbral según F1
best_threshold_idx = threshold_df['f1'].idxmax()
best_threshold = threshold_df.loc[best_threshold_idx, 'threshold']

logger.info(f"Mejor umbral encontrado: {best_threshold:.2f} con F1: {threshold_df.loc[best_threshold_idx, 'f1']:.4f}")

# Guardar análisis de umbrales
threshold_df.to_csv(os.path.join(CONFIG["output_dir"], "threshold_analysis.csv"), index=False)

# Actualizar el umbral en la configuración
CONFIG["threshold"] = float(best_threshold)
with open(os.path.join(CONFIG["output_dir"], "config.json"), 'w') as f:
    json.dump(CONFIG, f, indent=4)

# Calcular y visualizar curva ROC detallada con punto óptimo
fpr, tpr, thresholds_roc = roc_curve(test_results['labels'], test_results['predictions'])
roc_auc = auc(fpr, tpr)

# Calcular distancia al punto óptimo (0,1)
distances = np.sqrt((1-tpr)**2 + fpr**2)
optimal_idx = np.argmin(distances)
optimal_threshold = thresholds_roc[optimal_idx]

plt.figure(figsize=(10, 8))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.scatter(fpr[optimal_idx], tpr[optimal_idx], marker='o', color='red',
            label=f'Punto óptimo (umbral={optimal_threshold:.2f})')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Tasa de Falsos Positivos')
plt.ylabel('Tasa de Verdaderos Positivos')
plt.title('Curva ROC con punto óptimo')
plt.legend(loc="lower right")
plt.grid(True)
plt.savefig(os.path.join(CONFIG["output_dir"], "roc_curve_optimal_test.png"))
plt.close()

logger.info(f"Umbral óptimo según distancia a punto ideal en ROC: {optimal_threshold:.4f}")

# Guardar este umbral también
with open(os.path.join(CONFIG["output_dir"], "optimal_thresholds.json"), 'w') as f:
    json.dump({
        'f1_optimal': float(best_threshold),
        'roc_optimal': float(optimal_threshold)
    }, f, indent=4)

# Mostrar resumen de resultados
logger.info(f"Resumen de evaluación en conjunto de prueba:")
logger.info(f"Accuracy: {test_results['accuracy']:.4f}")
logger.info(f"Precision: {test_results['precision']:.4f}")
logger.info(f"Recall (Sensibilidad): {test_results['recall']:.4f}")
logger.info(f"Specificity: {test_results['specificity']:.4f}")
logger.info(f"F1-Score: {test_results['f1']:.4f}")
logger.info(f"ROC AUC: {test_results['roc_auc']:.4f}")
logger.info(f"PR AUC: {pr_auc:.4f}")

In [None]:
# ============================== EXPORTACIÓN DEL MODELO ==============================

logger.info("Exportando modelo para inferencia")

# Comprobar si ya tenemos el modelo cargado
try:
    # Intentar acceder al modelo
    model
    # Asegurarse de que tiene cargado el mejor modelo de fine-tuning
    best_ft_model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_ft.pt")
    checkpoint = torch.load(best_ft_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])
except NameError:
    # Si el modelo no está definido, crear uno nuevo y cargarlo
    logger.info("Cargando modelo desde checkpoint")
    model = TimesformerForVideoClassification.from_pretrained(
        CONFIG["pretrained_model"],
        num_frames=CONFIG["num_frames"],
        image_size=CONFIG["image_size"],
    )

    # Adaptarlo a nuestra tarea
    if model.classifier.out_features != CONFIG["num_classes"]:
        model.classifier = nn.Sequential(
            nn.Dropout(CONFIG["tl_dropout"]),
            nn.Linear(model.classifier.in_features, CONFIG["num_classes"])
        )

    best_ft_model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_ft.pt")
    checkpoint = torch.load(best_ft_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])

# Asegurar que el modelo está en el dispositivo correcto
model.to(device)
model.eval()

# 1. Guardar modelo en formato PyTorch
model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_final.pt")
torch.save({
    'model_state_dict': model.state_dict(),
    'config': CONFIG,
}, model_path)

logger.info(f"Modelo guardado en formato PyTorch: {model_path}")

# 2. Crear modelo para inferencia sin calcular pérdidas (más eficiente)
class TimesformerInference(nn.Module):
    def __init__(self, model):
        super().__init__()
        self.timesformer = model

    def forward(self, pixel_values):
        outputs = self.timesformer(pixel_values=pixel_values)
        logits = outputs.logits
        # Aplicar sigmoid para obtener probabilidades para la clase 'violencia'
        probs = torch.sigmoid(logits[:, 1])
        return probs

inference_model = TimesformerInference(model)
inference_model.eval()

# Guardar modelo de inferencia
inference_model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_inference.pt")
torch.save(inference_model, inference_model_path)

logger.info(f"Modelo de inferencia guardado: {inference_model_path}")

# 3. Guardar también usando save_pretrained de Hugging Face
save_dir = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_hf")
os.makedirs(save_dir, exist_ok=True)

# Guardar modelo y procesador
try:
    model.save_pretrained(save_dir)
    processor = AutoImageProcessor.from_pretrained(CONFIG["pretrained_model"])
    processor.save_pretrained(save_dir)
    logger.info(f"Modelo y procesador guardados en formato Hugging Face: {save_dir}")
except Exception as e:
    logger.warning(f"Error al guardar modelo con save_pretrained: {str(e)}")
    logger.info("Utilizando método alternativo para guardar en formato HF")

    # Método alternativo
    if not os.path.exists(os.path.join(save_dir, "config.json")):
        config_obj = TimesformerConfig.from_pretrained(CONFIG["pretrained_model"])
        config_obj.num_frames = CONFIG["num_frames"]
        config_obj.image_size = CONFIG["image_size"]
        config_obj.num_labels = CONFIG["num_classes"]
        config_obj.save_pretrained(save_dir)

    if not os.path.exists(os.path.join(save_dir, "pytorch_model.bin")):
        torch.save(model.state_dict(), os.path.join(save_dir, "pytorch_model.bin"))

    # Guardar procesador
    processor = AutoImageProcessor.from_pretrained(CONFIG["pretrained_model"])
    if not os.path.exists(os.path.join(save_dir, "preprocessor_config.json")):
        processor.save_pretrained(save_dir)

    logger.info(f"Modelo y procesador guardados en formato alternativo: {save_dir}")

# 4. Crear script de ejemplo para inferencia
inference_script = """
import torch
import torch.nn as nn
from transformers import TimesformerForVideoClassification, AutoImageProcessor
from decord import VideoReader, cpu
import numpy as np
from torchvision import transforms
import os
import json

def load_model(model_path, config_path=None):
    """Carga el modelo de detección de violencia"""
    # Cargar configuración si se proporciona
    if config_path:
        with open(config_path, 'r') as f:
            config = json.load(f)
    else:
        # Intentar encontrar config.json en el mismo directorio
        config_dir = os.path.dirname(model_path)
        config_path = os.path.join(config_dir, "config.json")
        if os.path.exists(config_path):
            with open(config_path, 'r') as f:
                config = json.load(f)
        else:
            # Valores por defecto
            config = {
                "num_frames": 16,
                "image_size": 224,
                "threshold": 0.5
            }

    # Opción 1: Cargar modelo guardado con HF save_pretrained
    if os.path.isdir(model_path):
        model = TimesformerForVideoClassification.from_pretrained(model_path)
        processor = AutoImageProcessor.from_pretrained(model_path)
        return model, processor, config

    # Opción 2: Cargar modelo guardado con torch.save
    checkpoint = torch.load(model_path, map_location="cpu")

    if isinstance(checkpoint, torch.nn.Module):
        # Es un modelo entero guardado con torch.save(model)
        return checkpoint, None, config

    # Es un diccionario con state_dict
    if "model_state_dict" in checkpoint:
        model = TimesformerForVideoClassification.from_pretrained(
            "facebook/timesformer-base-finetuned-k400",
            num_frames=config["num_frames"],
            image_size=config["image_size"],
        )
        model.load_state_dict(checkpoint["model_state_dict"])
        processor = AutoImageProcessor.from_pretrained("facebook/timesformer-base-finetuned-k400")
        return model, processor, config

    raise ValueError("Formato de modelo no reconocido")

def process_video(video_path, model, processor=None, config=None):
    """Procesa un video y detecta violencia"""
    if config is None:
        config = {
            "num_frames": 16,
            "image_size": 224,
            "threshold": 0.5
        }

    # Cargar video con decord
    video_reader = VideoReader(video_path, ctx=cpu(0))
    total_frames = len(video_reader)

    # Seleccionar frames uniformemente
    indices = np.linspace(0, total_frames - 1, config["num_frames"], dtype=int)
    frames = video_reader.get_batch(indices).asnumpy()  # (num_frames, H, W, C)

    # Preprocesar frames
    processed_frames = []
    for frame in frames:
        # Redimensionar
        frame = transforms.functional.resize(
            transforms.functional.to_tensor(frame),
            (config["image_size"], config["image_size"])
        )
        processed_frames.append(frame)

    # Apilar frames
    frames_tensor = torch.stack(processed_frames)  # (T, C, H, W)

    # Preprocesar con el procesador si está disponible
    if processor:
        inputs = processor(
            list(frames_tensor),  # Lista de tensores (T, C, H, W)
            return_tensors="pt"
        )
        pixel_values = inputs['pixel_values']
    else:
        # Formato alternativo si no hay procesador
        frames_tensor = frames_tensor.permute(1, 0, 2, 3).unsqueeze(0)  # (1, C, T, H, W)
        pixel_values = frames_tensor

    # Inferencia
    model.eval()
    with torch.no_grad():
        # Comprobar si es modelo de inferencia personalizado
        if isinstance(model, nn.Module) and hasattr(model, 'timesformer'):
            # Modelo de inferencia que devuelve directamente probabilidades
            violence_prob = model(pixel_values)
        else:
            # Modelo TimesformerForVideoClassification estándar
            outputs = model(pixel_values=pixel_values)
            # Continuación del script de inferencia
            logits = outputs.logits
            violence_prob = torch.sigmoid(logits[:, 1])

    # Determinar predicción según umbral
    is_violent = violence_prob.item() >= config["threshold"]

    return {
        "is_violent": is_violent,
        "violence_probability": violence_prob.item(),
        "threshold": config["threshold"]
    }

# Ejemplo de uso
if __name__ == "__main__":
    # Ruta al modelo guardado
    model_path = "ruta/al/modelo_timesformer_inference.pt"
    config_path = "ruta/al/config.json"

    # Cargar modelo
    model, processor, config = load_model(model_path, config_path)

    # Ruta al video a procesar
    video_path = "ruta/al/video_ejemplo.mp4"

    # Procesar video
    result = process_video(video_path, model, processor, config)

    # Mostrar resultado
    print(f"¿Detectada violencia?: {'Sí' if result['is_violent'] else 'No'}")
    print(f"Probabilidad de violencia: {result['violence_probability']:.4f}")
"""

# Guardar script de ejemplo
with open(os.path.join(CONFIG["output_dir"], "inference_example.py"), 'w') as f:
    f.write(inference_script)

logger.info(f"Script de ejemplo para inferencia guardado en: {os.path.join(CONFIG['output_dir'], 'inference_example.py')}")

# 5. Exportar modelo en formato ONNX para inferencia más rápida
try:
    # Generar un input de ejemplo
    dummy_input = torch.randn(1, 3, CONFIG["num_frames"], CONFIG["image_size"], CONFIG["image_size"]).to(device)

    # Configurar rutas
    onnx_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}.onnx")

    # Exportar a ONNX
    torch.onnx.export(
        inference_model,
        dummy_input,
        onnx_path,
        export_params=True,
        opset_version=12,
        input_names=['input'],
        output_names=['output'],
        dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
    )

    logger.info(f"Modelo exportado en formato ONNX: {onnx_path}")
except Exception as e:
    logger.warning(f"No se pudo exportar a ONNX: {str(e)}")
    logger.info("Puede ser debido a compatibilidad con la arquitectura. Esto no afectará al uso del modelo en PyTorch.")

# Mostrar rutas de los modelos exportados
export_paths = {
    'pytorch_model': model_path,
    'inference_model': inference_model_path,
    'huggingface_model': save_dir,
    'onnx_model': os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}.onnx"),
    'config': os.path.join(CONFIG["output_dir"], "config.json"),
    'inference_script': os.path.join(CONFIG["output_dir"], "inference_example.py")
}

logger.info("Rutas de los modelos exportados:")
for key, path in export_paths.items():
    logger.info(f"  - {key}: {path}")

In [None]:
# ============================== PRUEBA DE INFERENCIA ==============================

logger.info("Realizando pruebas de inferencia en muestras")

# Comprobar si ya tenemos el modelo cargado
try:
    # Intentar acceder al modelo
    model
    # Asegurarse de que tiene cargado el mejor modelo de fine-tuning
    best_ft_model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_ft.pt")
    checkpoint = torch.load(best_ft_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])
except NameError:
    # Si el modelo no está definido, crear uno nuevo y cargarlo
    logger.info("Cargando modelo desde checkpoint")
    model = TimesformerForVideoClassification.from_pretrained(
        CONFIG["pretrained_model"],
        num_frames=CONFIG["num_frames"],
        image_size=CONFIG["image_size"],
    )

    # Adaptarlo a nuestra tarea
    if model.classifier.out_features != CONFIG["num_classes"]:
        model.classifier = nn.Sequential(
            nn.Dropout(CONFIG["tl_dropout"]),
            nn.Linear(model.classifier.in_features, CONFIG["num_classes"])
        )

    best_ft_model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_ft.pt")
    checkpoint = torch.load(best_ft_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])

# Asegurar que el modelo está en el dispositivo correcto
model.to(device)
model.eval()

# Cargar dataset de prueba
test_dataset = ViolenceVideoDataset(
    root_dir=CONFIG["dataset_path"],
    split='test',
    num_frames=CONFIG["num_frames"],
    image_size=CONFIG["image_size"]
)

# Seleccionar algunas muestras aleatorias
num_samples = min(5, len(test_dataset))
sample_indices = random.sample(range(len(test_dataset)), num_samples)

# Resultados
results = []

# Crear figura para visualización
fig, axes = plt.subplots(num_samples, 2, figsize=(12, 4*num_samples))
if num_samples == 1:
    axes = axes.reshape(1, 2)

for i, idx in enumerate(sample_indices):
    try:
        # Obtener muestra
        sample = test_dataset[idx]
        pixel_values = sample['pixel_values'].unsqueeze(0).to(device)  # Añadir dimensión de batch
        label = sample['labels'].item()
        video_path = sample['video_path']

        # Inferencia
        with torch.no_grad():
            outputs = model(pixel_values=pixel_values)
            logits = outputs.logits
            probs = torch.softmax(logits, dim=1)
            violence_prob = probs[0, 1].item()
            prediction = violence_prob >= CONFIG["threshold"]

        # Extraer un frame para visualización
        video_reader = VideoReader(video_path, ctx=cpu(0))
        mid_frame_idx = len(video_reader) // 2
        frame = video_reader[mid_frame_idx].asnumpy()

        # Guardar resultado
        results.append({
            'video_path': video_path,
            'true_label': label,
            'violence_prob': violence_prob,
            'prediction': prediction,
            'correct': (prediction == label)
        })

        # Visualizar
        axes[i, 0].imshow(frame)
        axes[i, 0].set_title(f"Video: {os.path.basename(video_path)}")
        axes[i, 0].axis('off')

        # Graficar probabilidad
        bar_colors = ['green', 'red']
        class_names = ['No Violencia', 'Violencia']
        class_probs = [1 - violence_prob, violence_prob]

        axes[i, 1].barh(class_names, class_probs, color=bar_colors)
        axes[i, 1].set_xlim(0, 1)
        axes[i, 1].set_title(f"Predicción: {'Violencia' if prediction else 'No Violencia'} " +
                          f"(Real: {'Violencia' if label else 'No Violencia'})")
        axes[i, 1].axvline(x=CONFIG["threshold"], color='black', linestyle='--',
                      label=f'Umbral: {CONFIG["threshold"]:.2f}')
        axes[i, 1].legend()

    except Exception as e:
        logger.error(f"Error al procesar muestra {idx}: {str(e)}")
        # En caso de error, dejar la posición vacía
        axes[i, 0].axis('off')
        axes[i, 1].axis('off')
        continue

plt.tight_layout()
plt.savefig(os.path.join(CONFIG["output_dir"], "inference_samples.png"))
plt.close()

# Guardar resultados
if results:
    results_df = pd.DataFrame(results)
    results_df.to_csv(os.path.join(CONFIG["output_dir"], "inference_samples_results.csv"), index=False)

    # Mostrar resumen
    correct_count = sum(1 for r in results if r['correct'])
    logger.info(f"Precisión en muestras de prueba: {correct_count}/{len(results)} ({100 * correct_count / len(results):.1f}%)")
else:
    logger.warning("No se pudieron procesar muestras para pruebas de inferencia")

In [None]:
# ============================== MEDICIÓN DE RENDIMIENTO ==============================

logger.info("Realizando benchmark de velocidad de inferencia")

# Asegurarse de que el modelo está cargado
try:
    # Intentar acceder al modelo
    model
    # Configurar para evaluación
    model.eval()
except NameError:
    # Si el modelo no está definido, cargar el mejor de fine-tuning
    logger.info("Cargando modelo desde checkpoint")
    model = TimesformerForVideoClassification.from_pretrained(
        CONFIG["pretrained_model"],
        num_frames=CONFIG["num_frames"],
        image_size=CONFIG["image_size"],
    )

    # Adaptarlo a nuestra tarea
    if model.classifier.out_features != CONFIG["num_classes"]:
        model.classifier = nn.Sequential(
            nn.Dropout(CONFIG["tl_dropout"]),
            nn.Linear(model.classifier.in_features, CONFIG["num_classes"])
        )

    best_ft_model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_ft.pt")
    checkpoint = torch.load(best_ft_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()

# Asegurar que el modelo está en el dispositivo correcto
model.to(device)

# Crear datos de ejemplo
dummy_input = torch.randn(1, 3, CONFIG["num_frames"], CONFIG["image_size"], CONFIG["image_size"]).to(device)

# Calentar la GPU
logger.info("Calentando GPU...")
with torch.no_grad():
    for _ in range(10):
        _ = model(pixel_values=dummy_input)

# Medir tiempo de inferencia
num_runs = 100
logger.info(f"Midiendo tiempo para {num_runs} ejecuciones...")

start_time = time.time()
with torch.no_grad():
    for _ in range(num_runs):
        _ = model(pixel_values=dummy_input)
end_time = time.time()

# Calcular métricas
total_time = end_time - start_time
avg_time_per_inference = total_time / num_runs
fps = num_runs / total_time

logger.info(f"Tiempo total para {num_runs} ejecuciones: {total_time:.4f} segundos")
logger.info(f"Tiempo promedio por inferencia: {avg_time_per_inference*1000:.2f} ms")
logger.info(f"FPS (frames por segundo): {fps:.2f}")

# Guardar resultados
benchmark_results = {
    'total_time': total_time,
    'num_runs': num_runs,
    'avg_time_per_inference_ms': avg_time_per_inference * 1000,
    'fps': fps,
    'batch_size': 1,
    'num_frames': CONFIG["num_frames"],
    'image_size': CONFIG["image_size"],
    'device': str(device)
}

with open(os.path.join(CONFIG["output_dir"], "benchmark_results.json"), 'w') as f:
    json.dump(benchmark_results, f, indent=4)

logger.info("Benchmark completado y resultados guardados.")

In [None]:
# ============================== RESUMEN FINAL DE MÉTRICAS ==============================

logger.info("=== RESUMEN FINAL DE MÉTRICAS ===")

# Intentar cargar informes de evaluación
try:
    # Transfer Learning
    tl_report_path = os.path.join(CONFIG["output_dir"], "evaluation_report_transfer_learning.json")
    with open(tl_report_path, 'r') as f:
        tl_report = json.load(f)

    # Fine-Tuning
    ft_report_path = os.path.join(CONFIG["output_dir"], "evaluation_report_fine_tuning.json")
    with open(ft_report_path, 'r') as f:
        ft_report = json.load(f)

    # Test
    test_report_path = os.path.join(CONFIG["output_dir"], "evaluation_report_test.json")
    with open(test_report_path, 'r') as f:
        test_report = json.load(f)

    # Mostrar métricas
    logger.info("Métricas en Transfer Learning (validación):")
    logger.info(f"  - Accuracy: {tl_report['metrics']['accuracy']:.4f}")
    logger.info(f"  - Precision: {tl_report['metrics']['precision']:.4f}")
    logger.info(f"  - Recall (Sensibilidad): {tl_report['metrics']['recall']:.4f}")
    logger.info(f"  - Specificity: {tl_report['metrics']['specificity']:.4f}")
    logger.info(f"  - F1-Score: {tl_report['metrics']['f1_score']:.4f}")
    logger.info(f"  - ROC AUC: {tl_report['metrics']['roc_auc']:.4f}")

    logger.info("Métricas en Fine-Tuning (validación):")
    logger.info(f"  - Accuracy: {ft_report['metrics']['accuracy']:.4f}")
    logger.info(f"  - Precision: {ft_report['metrics']['precision']:.4f}")
    logger.info(f"  - Recall (Sensibilidad): {ft_report['metrics']['recall']:.4f}")
    logger.info(f"  - Specificity: {ft_report['metrics']['specificity']:.4f}")
    logger.info(f"  - F1-Score: {ft_report['metrics']['f1_score']:.4f}")
    logger.info(f"  - ROC AUC: {ft_report['metrics']['roc_auc']:.4f}")

    logger.info("Métricas en Test (final):")
    logger.info(f"  - Accuracy: {test_report['metrics']['accuracy']:.4f}")
    logger.info(f"  - Precision: {test_report['metrics']['precision']:.4f}")
    logger.info(f"  - Recall (Sensibilidad): {test_report['metrics']['recall']:.4f}")
    logger.info(f"  - Specificity: {test_report['metrics']['specificity']:.4f}")
    logger.info(f"  - F1-Score: {test_report['metrics']['f1_score']:.4f}")
    logger.info(f"  - ROC AUC: {test_report['metrics']['roc_auc']:.4f}")

    # Crear tabla comparativa
    metrics = ['accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'roc_auc']
    data = {
        'Métrica': metrics,
        'Transfer Learning': [tl_report['metrics'][m] for m in metrics],
        'Fine-Tuning': [ft_report['metrics'][m] for m in metrics],
        'Test': [test_report['metrics'][m] for m in metrics]
    }

    df = pd.DataFrame(data)

    # Formatear para mostrar resultados
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 120)
    pd.set_option('display.precision', 4)

    print("\n=== TABLA COMPARATIVA DE MÉTRICAS ===")
    print(df)

    # Guardar tabla
    df.to_csv(os.path.join(CONFIG["output_dir"], "metrics_comparison.csv"), index=False)

except Exception as e:
    logger.error(f"Error al cargar informes de evaluación: {str(e)}")
    logger.info("Asegúrate de que las fases de Transfer Learning, Fine-Tuning y Test ya se han ejecutado.")

# Mostrar información sobre el modelo final
try:
    # Cargar información del benchmark
    benchmark_path = os.path.join(CONFIG["output_dir"], "benchmark_results.json")
    with open(benchmark_path, 'r') as f:
        benchmark = json.load(f)

    logger.info("\nRendimiento del modelo:")
    logger.info(f"  - Tiempo por inferencia: {benchmark['avg_time_per_inference_ms']:.2f} ms")
    logger.info(f"  - Frames por segundo: {benchmark['fps']:.2f} FPS")

    # Cargar umbrales óptimos
    thresholds_path = os.path.join(CONFIG["output_dir"], "optimal_thresholds.json")
    with open(thresholds_path, 'r') as f:
        thresholds = json.load(f)

    logger.info("\nUmbrales óptimos:")
    logger.info(f"  - Umbral óptimo según F1: {thresholds['f1_optimal']:.4f}")
    logger.info(f"  - Umbral óptimo según ROC: {thresholds['roc_optimal']:.4f}")

except Exception as e:
    logger.error(f"Error al cargar información de rendimiento: {str(e)}")

# Mostrar rutas de los modelos exportados
try:
    model_paths = {
        'Modelo PyTorch': os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_final.pt"),
        'Modelo de Inferencia': os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_inference.pt"),
        'Modelo Hugging Face': os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_hf"),
        'Modelo ONNX': os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}.onnx"),
        'Script de Inferencia': os.path.join(CONFIG["output_dir"], "inference_example.py")
    }

    logger.info("\nModelos exportados:")
    for name, path in model_paths.items():
        exists = "✓" if os.path.exists(path) else "✗"
        logger.info(f"  - {name}: {path} {exists}")

except Exception as e:
    logger.error(f"Error al verificar rutas de modelos: {str(e)}")

logger.info("\n¡Entrenamiento y evaluación del modelo TimeSformer para detección de violencia completados!")