In [None]:
# Instalación de dependencias
!pip install transformers
!pip install datasets
!pip install decord
!pip install scikit-learn
!pip install matplotlib
!pip install seaborn
!pip install pandas
!pip install tqdm
!pip install scipy
!pip install torchmetrics
!pip install timm
!pip install av
!pip install einops
!pip install evaluate

Collecting decord
  Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl.metadata (422 bytes)
Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl (13.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.6/13.6 MB[0m [31m118.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: decord
Successfully installed decord-0.6.0
Collecting torchmetrics
  Downloading torchmetrics-1.7.1-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.14.3-py3-none-any.whl.metadata (5.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from 

In [None]:
# Importar bibliotecas necesarias
import os
import random
import math
import time
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from datetime import datetime
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torch.optim.lr_scheduler import OneCycleLR, CosineAnnealingLR
from torchvision import transforms
from torchmetrics.classification import BinaryAccuracy, BinaryPrecision, BinaryRecall, BinaryF1Score, BinarySpecificity
from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, average_precision_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from transformers import TimesformerForVideoClassification, TimesformerConfig, AutoImageProcessor
from transformers import get_cosine_schedule_with_warmup
import decord
from decord import VideoReader, cpu
import av
import gc
import warnings
import random
import io
import zipfile
import logging
import json
from pathlib import Path


In [None]:
# Montar Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Configurar advertencias
warnings.filterwarnings('ignore')

# Configurar logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


# Verificar disponibilidad de GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando dispositivo: {device}")

Usando dispositivo: cuda


In [None]:
# ============================== CONFIGURACIÓN DE HIPERPARÁMETROS ==============================

# Hiperparámetros generales
CONFIG = {
    # Rutas y nombres
    "dataset_path": "/content/drive/MyDrive/dataset_violencia",  # Ajustar según la ubicación real
    "output_dir": "/content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer",
    "model_name": "timesformer_violence_detector",

    # Parámetros del modelo
    "pretrained_model": "facebook/timesformer-base-finetuned-k400",
    "num_frames": 8,              # Número de frames a procesar
    "image_size": 224,             # Tamaño de los frames (224x224)
    "num_classes": 2,              # Violencia / No violencia

    # Parámetros de entrenamiento - Transfer Learning
    "tl_batch_size": 8,            # Tamaño del batch
    "tl_num_epochs": 10,           # Número de épocas
    "tl_learning_rate": 5e-5,      # Learning rate inicial
    "tl_weight_decay": 1e-4,       # Regularización L2
    "tl_dropout": 0.2,             # Tasa de dropout
    "tl_warmup_ratio": 0.1,        # Proporción de steps para warmup

    # Parámetros de entrenamiento - Fine-Tuning
    "ft_batch_size": 8,            # Tamaño del batch (más pequeño para fine-tuning)
    "ft_num_epochs": 5,            # Número de épocas adicionales
    "ft_learning_rate": 1e-5,      # Learning rate más bajo para fine-tuning
    "ft_weight_decay": 5e-5,       # Regularización L2 suave

    # Umbral de clasificación
    "threshold": 0.70,              # Umbral de decisión para la clasificación

    # Configuración de checkpoints
    "save_steps": 200,             # Guardar cada X pasos
    "save_total_limit": 3,         # Máximo número de checkpoints a mantener
    "save_best_only": True,        # Guardar solo el mejor modelo

    # Métricas y evaluación
    "eval_steps": 100,              # Evaluar cada X pasos
    "logging_steps": 50,           # Mostrar métricas cada X pasos

    # Otros parámetros
    "seed": 42,                    # Semilla para reproducibilidad
    "mixed_precision": True,       # Usar precisión mixta para acelerar entrenamiento
}

# Crear directorio de salida si no existe
os.makedirs(CONFIG["output_dir"], exist_ok=True)

# Guardar configuración
with open(os.path.join(CONFIG["output_dir"], "config.json"), 'w') as f:
    json.dump(CONFIG, f, indent=4)

# Configurar reproducibilidad
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(CONFIG["seed"])

In [None]:
# ============================== CLASES PARA EL DATASET Y PROCESAMIENTO ==============================

# Clase para procesar y cargar los videos
class ViolenceVideoDataset(Dataset):
    def __init__(self, root_dir, split='train', transform=None, num_frames=16, image_size=224, max_videos=None):
        """
        Dataset para clasificación de violencia en videos

        Args:
            root_dir: Directorio raíz del dataset
            split: 'train', 'val' o 'test'
            transform: Transformaciones a aplicar
            num_frames: Número de frames a extraer de cada video
            image_size: Tamaño de los frames
            max_videos: Limitar número de videos (para pruebas rápidas)
        """
        self.root_dir = root_dir
        self.split = split
        self.transform = transform
        self.num_frames = num_frames
        self.image_size = image_size

        self.processor = AutoImageProcessor.from_pretrained(CONFIG["pretrained_model"])

        # Obtener las rutas de videos y etiquetas
        violence_dir = os.path.join(root_dir, split, 'violence')
        no_violence_dir = os.path.join(root_dir, split, 'no_violence')

        # Verificar que los directorios existan
        if not os.path.exists(violence_dir) or not os.path.exists(no_violence_dir):
            raise ValueError(f"No se encontraron los directorios del dataset en {root_dir}/{split}")

        violence_videos = glob.glob(os.path.join(violence_dir, '*.mp4'))
        no_violence_videos = glob.glob(os.path.join(no_violence_dir, '*.mp4'))

        if len(violence_videos) == 0 or len(no_violence_videos) == 0:
            raise ValueError(f"No se encontraron videos en {violence_dir} o {no_violence_dir}")

        # Limitar videos si es necesario
        if max_videos is not None:
            max_per_class = max_videos // 2
            violence_videos = violence_videos[:max_per_class]
            no_violence_videos = no_violence_videos[:max_per_class]

        self.video_paths = violence_videos + no_violence_videos
        self.labels = [1] * len(violence_videos) + [0] * len(no_violence_videos)

        # Mezclar los datos manteniendo correspondencia entre paths y labels
        combined = list(zip(self.video_paths, self.labels))
        random.shuffle(combined)
        self.video_paths, self.labels = zip(*combined)

        # Convertir a lista
        self.video_paths = list(self.video_paths)
        self.labels = list(self.labels)

        print(f"Cargados {len(self.video_paths)} videos para split '{split}'")
        print(f"Violencia: {len(violence_videos)}, No Violencia: {len(no_violence_videos)}")

    def __len__(self):
        return len(self.video_paths)

    def sample_frames_from_video(self, video_path):
        """Extrae frames uniformemente espaciados del video"""
        try:
            # Usar decord para cargar el video eficientemente
            video_reader = VideoReader(video_path, ctx=cpu(0))
            total_frames = len(video_reader)

            if total_frames == 0:
                raise ValueError(f"Video vacío o corrupto: {video_path}")

            # Seleccionar frames uniformemente
            indices = np.linspace(0, total_frames - 1, self.num_frames, dtype=int)
            frames = video_reader.get_batch(indices).asnumpy()  # (num_frames, H, W, C)

            # Aplicar resize y normalización
            processed_frames = []
            for frame in frames:
                # Redimensionar
                frame = transforms.functional.resize(
                    transforms.functional.to_tensor(frame),
                    (self.image_size, self.image_size)
                )
                processed_frames.append(frame)

            # Apilar frames
            frames_tensor = torch.stack(processed_frames)  # (T, C, H, W)

            # Mover dimensiones para coincidir con lo que espera el modelo (B, C, T, H, W)
            frames_tensor = frames_tensor.permute(1, 0, 2, 3).unsqueeze(0)

            return frames_tensor

        except Exception as e:
            logger.error(f"Error al procesar video {video_path}: {str(e)}")
            # Retornar un tensor de ceros en caso de error
            return torch.zeros((1, 3, self.num_frames, self.image_size, self.image_size))

    def __getitem__(self, idx):
        """Obtiene un item por su índice"""
        video_path = self.video_paths[idx]
        label = self.labels[idx]

        # Extraer frames
        frames = self.sample_frames_from_video(video_path)

        # Preprocesar frames usando el procesador de TimeSformer
        try:
            frames_list = list(frames.squeeze(0).permute(1, 0, 2, 3))  # Convertir a lista de tensores (T, C, H, W)
            # inputs = self.processor(frames_list, return_tensors="pt")
            inputs = self.processor(
                frames_list,
                return_tensors="pt",
                do_rescale=False  # Añadir esta línea para evitar el re-escalado
            )
            pixel_values = inputs['pixel_values'].squeeze(0)  # Eliminar dim de batch
        except Exception as e:
            logger.error(f"Error al procesar frames del video {video_path}: {str(e)}")
            # Crear input vacío de tamaño correcto en caso de error
            pixel_values = torch.zeros((3, self.num_frames, self.image_size, self.image_size))

        return {
            'pixel_values': pixel_values,
            'labels': torch.tensor(label, dtype=torch.long),
            'video_path': video_path
        }

# ============================== FUNCIONES DE ENTRENAMIENTO Y EVALUACIÓN ==============================

def train_epoch(model, dataloader, optimizer, scheduler, criterion, device, epoch, config):
    """Entrena el modelo durante una época completa"""
    model.train()
    epoch_loss = 0
    epoch_acc = 0
    epoch_precision = 0
    epoch_recall = 0
    epoch_f1 = 0

    # Métricas
    accuracy_metric = BinaryAccuracy().to(device)
    precision_metric = BinaryPrecision().to(device)
    recall_metric = BinaryRecall().to(device)
    f1_metric = BinaryF1Score().to(device)

    progress_bar = tqdm(enumerate(dataloader), total=len(dataloader), desc=f"Época {epoch+1}")

    for step, batch in progress_bar:
        try:
            # Inicializar con valores predeterminados en caso de error
            loss_value = 0.0
            accuracy = precision = recall = f1 = 0.0

            # Mover datos al dispositivo
            pixel_values = batch['pixel_values'].to(device)
            labels = batch['labels'].to(device)

            # Forward pass
            outputs = model(pixel_values=pixel_values, labels=labels)
            loss = outputs.loss
            loss_value = loss.item()  # Guardar valor inmediatamente

            # Backward pass
            optimizer.zero_grad()
            loss.backward()

            # Clip gradient norm para estabilidad
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            optimizer.step()
            if scheduler is not None:
                scheduler.step()

            # Calcular métricas
            logits = outputs.logits
            preds = torch.sigmoid(logits[:, 1])  # Solo necesitamos la probabilidad de 'violencia'

            accuracy = accuracy_metric(preds, labels)
            precision = precision_metric(preds, labels)
            recall = recall_metric(preds, labels)
            f1 = f1_metric(preds, labels)

            # Acumular métricas
            epoch_loss += loss.item()
            epoch_acc += accuracy.item()
            epoch_precision += precision.item()
            epoch_recall += recall.item()
            epoch_f1 += f1.item()

            # Actualizar progreso
            progress_bar.set_postfix({
                'loss': loss.item(),
                'acc': accuracy.item(),
                'prec': precision.item(),
                'rec': recall.item(),
                'f1': f1.item()
            })

            # Liberar memoria explícitamente
            del pixel_values, labels, outputs, loss, logits, preds
            torch.cuda.empty_cache()

            # Guardar checkpoint cada ciertos pasos
            if (step + 1) % config["save_steps"] == 0:
                checkpoint_path = os.path.join(
                    config["output_dir"],
                    f"checkpoint_epoch{epoch+1}_step{step+1}.pt"
                )
                torch.save({
                    'epoch': epoch,
                    'step': step,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'scheduler_state_dict': scheduler.state_dict() if scheduler else None,
                    'loss': loss.item(),
                }, checkpoint_path)
                logger.info(f"Guardado checkpoint en {checkpoint_path}")

        except Exception as e:
            logger.error(f"Error en paso {step}, época {epoch+1}: {str(e)}")
            # Intentar liberar memoria y continuar
            torch.cuda.empty_cache()
            continue

    # Calcular métricas promedio
    num_batches = len(dataloader)
    epoch_loss /= num_batches
    epoch_acc /= num_batches
    epoch_precision /= num_batches
    epoch_recall /= num_batches
    epoch_f1 /= num_batches

    return {
        'loss': epoch_loss,
        'accuracy': epoch_acc,
        'precision': epoch_precision,
        'recall': epoch_recall,
        'f1': epoch_f1
    }

def evaluate(model, dataloader, criterion, device, config):
    """Evalúa el modelo en un conjunto de datos"""
    model.eval()
    all_preds = []
    all_labels = []
    val_loss = 0

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluando"):
            try:
                # Mover datos al dispositivo
                pixel_values = batch['pixel_values'].to(device)
                labels = batch['labels'].to(device)

                # Forward pass
                outputs = model(pixel_values=pixel_values, labels=labels)
                loss = outputs.loss
                val_loss += loss.item()

                # Obtener predicciones
                logits = outputs.logits
                preds = torch.sigmoid(logits[:, 1])  # Solo la probabilidad de 'violencia'

                # Guardar predicciones y etiquetas
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

                # Liberar memoria
                del pixel_values, labels, outputs, loss, logits, preds
                torch.cuda.empty_cache()

            except Exception as e:
                logger.error(f"Error al evaluar batch: {str(e)}")
                continue

    # Convertir a arrays numpy
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    if len(all_preds) == 0 or len(all_labels) == 0:
        logger.error("No se pudieron obtener predicciones o etiquetas durante la evaluación")
        return {
            'loss': float('inf'),
            'accuracy': 0,
            'precision': 0,
            'recall': 0,
            'specificity': 0,
            'f1': 0,
            'roc_auc': 0,
            'confusion_matrix': np.zeros((2, 2)),
            'fpr': np.array([0, 1]),
            'tpr': np.array([0, 0]),
            'predictions': np.array([]),
            'labels': np.array([])
        }

    # Calcular métricas
    binary_preds = (all_preds >= config["threshold"]).astype(int)

    accuracy = accuracy_score(all_labels, binary_preds)
    precision = precision_score(all_labels, binary_preds, zero_division=0)
    recall = recall_score(all_labels, binary_preds, zero_division=0)
    f1 = f1_score(all_labels, binary_preds, zero_division=0)

    # Calcular especificidad (TN / (TN + FP))
    tn, fp, fn, tp = confusion_matrix(all_labels, binary_preds, labels=[0, 1]).ravel()
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0

    # Calcular métricas de curva ROC
    try:
        fpr, tpr, _ = roc_curve(all_labels, all_preds)
        roc_auc = auc(fpr, tpr)
    except Exception as e:
        logger.error(f"Error al calcular curva ROC: {str(e)}")
        fpr, tpr = np.array([0, 1]), np.array([0, 0])
        roc_auc = 0

    # Matriz de confusión
    cm = confusion_matrix(all_labels, binary_preds, labels=[0, 1])

    # Pérdida promedio
    val_loss /= len(dataloader)

    # Crear informe de evaluación
    eval_results = {
        'loss': val_loss,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,  # Sensibilidad
        'specificity': specificity,
        'f1': f1,
        'roc_auc': roc_auc,
        'confusion_matrix': cm,
        'fpr': fpr,
        'tpr': tpr,
        'predictions': all_preds,
        'labels': all_labels
    }

    return eval_results

def plot_metrics(train_metrics, val_metrics, config):
    """Genera gráficos de métricas de entrenamiento"""
    metrics_to_plot = ['loss', 'accuracy', 'precision', 'recall', 'f1']
    epochs = range(1, len(train_metrics['loss']) + 1)

    plt.figure(figsize=(20, 15))

    for i, metric in enumerate(metrics_to_plot):
        plt.subplot(3, 2, i+1)
        plt.plot(epochs, train_metrics[metric], 'b-', label=f'Training {metric}')
        plt.plot(epochs, val_metrics[metric], 'r-', label=f'Validation {metric}')
        plt.title(f'{metric.capitalize()} vs. Epochs')
        plt.xlabel('Epochs')
        plt.ylabel(metric.capitalize())
        plt.legend()
        plt.grid(True)

    # Guardar figura
    plt.tight_layout()
    plt.savefig(os.path.join(config["output_dir"], "training_metrics.png"))
    plt.close()

def plot_confusion_matrix(cm, config, phase='transfer_learning'):
    """Visualiza la matriz de confusión"""
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['No Violencia', 'Violencia'],
                yticklabels=['No Violencia', 'Violencia'])
    plt.xlabel('Predicción')
    plt.ylabel('Real')
    plt.title('Matriz de Confusión')

    # Guardar figura
    plt.tight_layout()
    plt.savefig(os.path.join(config["output_dir"], f"confusion_matrix_{phase}.png"))
    plt.close()

def plot_roc_curve(fpr, tpr, roc_auc, config, phase='transfer_learning'):
    """Visualiza la curva ROC"""
    plt.figure(figsize=(10, 8))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC)')
    plt.legend(loc="lower right")

    # Guardar figura
    plt.tight_layout()
    plt.savefig(os.path.join(config["output_dir"], f"roc_curve_{phase}.png"))
    plt.close()

def save_evaluation_report(eval_results, config, phase='transfer_learning'):
    """Guarda un informe detallado de la evaluación"""
    report = {
        'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        'phase': phase,
        'metrics': {
            'loss': float(eval_results['loss']),
            'accuracy': float(eval_results['accuracy']),
            'precision': float(eval_results['precision']),
            'recall': float(eval_results['recall']),
            'specificity': float(eval_results['specificity']),
            'f1_score': float(eval_results['f1']),
            'roc_auc': float(eval_results['roc_auc']),
        },
        'confusion_matrix': eval_results['confusion_matrix'].tolist(),
    }

    # Guardar informe en formato JSON
    with open(os.path.join(config["output_dir"], f"evaluation_report_{phase}.json"), 'w') as f:
        json.dump(report, f, indent=4)

    # También guardar en formato de texto para mejor legibilidad
    with open(os.path.join(config["output_dir"], f"evaluation_report_{phase}.txt"), 'w') as f:
        f.write(f"Evaluación del Modelo - Fase: {phase}\n")
        f.write(f"Fecha: {report['timestamp']}\n")
        f.write("\n=== Métricas ===\n")
        f.write(f"Loss: {report['metrics']['loss']:.4f}\n")
        f.write(f"Accuracy: {report['metrics']['accuracy']:.4f}\n")
        f.write(f"Precision: {report['metrics']['precision']:.4f}\n")
        f.write(f"Recall (Sensibilidad): {report['metrics']['recall']:.4f}\n")
        f.write(f"Specificity: {report['metrics']['specificity']:.4f}\n")
        f.write(f"F1-Score: {report['metrics']['f1_score']:.4f}\n")
        f.write(f"ROC AUC: {report['metrics']['roc_auc']:.4f}\n")
        f.write("\n=== Matriz de Confusión ===\n")
        f.write("                Pred: No Violencia  Pred: Violencia\n")
        f.write(f"Real: No Violencia    {eval_results['confusion_matrix'][0][0]}               {eval_results['confusion_matrix'][0][1]}\n")
        f.write(f"Real: Violencia       {eval_results['confusion_matrix'][1][0]}               {eval_results['confusion_matrix'][1][1]}\n")

# **ENTRENAMIENTO CON TRANSFER LEARNING**

In [None]:
# ============================== ENTRENAMIENTO CON TRANSFER LEARNING ==============================

logger.info("Iniciando fase de Transfer Learning")
print("Iniciando fase de Transfer Learning")

# 1. Cargar modelo pre-entrenado
model = TimesformerForVideoClassification.from_pretrained(
    CONFIG["pretrained_model"],
    num_frames=CONFIG["num_frames"],
    image_size=CONFIG["image_size"],
    num_labels=CONFIG["num_classes"],  # Añadir esto para configurar 2 clases desde el inicio
    ignore_mismatched_sizes=True
)

# 2. Asegurarnos de que la clasificación final tiene el número correcto de salidas
if hasattr(model, 'classifier'):
    if hasattr(model.classifier, 'out_features') and model.classifier.out_features != CONFIG["num_classes"]:
        # Guardar dimensión de entrada
        in_features = model.classifier.in_features

        # Reemplazar completamente el clasificador
        model.classifier = nn.Linear(in_features, CONFIG["num_classes"])

        logger.info(f"Reemplazada capa de clasificación: {in_features} -> {CONFIG['num_classes']}")
    elif isinstance(model.classifier, nn.Sequential):
        # Si ya es una secuencia, asegurarnos que la última capa tenga la salida correcta
        last_layer = model.classifier[-1]
        if hasattr(last_layer, 'out_features') and last_layer.out_features != CONFIG["num_classes"]:
            in_features = last_layer.in_features
            model.classifier[-1] = nn.Linear(in_features, CONFIG["num_classes"])
            logger.info(f"Reemplazada última capa de clasificación: {in_features} -> {CONFIG['num_classes']}")


# 3. Congelar los parámetros del modelo base (excepto los de la capa de clasificación)
for name, param in model.named_parameters():
    if 'classifier' not in name:  # Congelar todos los parámetros excepto los del clasificador
        param.requires_grad = False

# Verificar parámetros entrenables vs congelados
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
logger.info(f"Parámetros entrenables: {trainable_params:,} / {total_params:,} ({100 * trainable_params / total_params:.2f}%)")
print(f"Parámetros entrenables: {trainable_params:,} / {total_params:,} ({100 * trainable_params / total_params:.2f}%)")

# Mover modelo a GPU
model.to(device)

# 4. Preparar datasets y dataloaders
train_dataset = ViolenceVideoDataset(
    root_dir=CONFIG["dataset_path"],
    split='train',
    num_frames=CONFIG["num_frames"],
    image_size=CONFIG["image_size"]
)

val_dataset = ViolenceVideoDataset(
    root_dir=CONFIG["dataset_path"],
    split='val',
    num_frames=CONFIG["num_frames"],
    image_size=CONFIG["image_size"]
)

train_dataloader = DataLoader(
    train_dataset,
    batch_size=CONFIG["tl_batch_size"],
    shuffle=True,
    num_workers=2,
    pin_memory=True
)

val_dataloader = DataLoader(
    val_dataset,
    batch_size=CONFIG["tl_batch_size"],
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

# 5. Configurar optimizador y scheduler
optimizer = optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=CONFIG["tl_learning_rate"],
    weight_decay=CONFIG["tl_weight_decay"]
)

# Calcular pasos totales para schedulers
num_training_steps = len(train_dataloader) * CONFIG["tl_num_epochs"]
num_warmup_steps = int(num_training_steps * CONFIG["tl_warmup_ratio"])

scheduler = get_cosine_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=num_training_steps
)

# 6. Criterio de pérdida (ya incluido en el modelo)
criterion = nn.CrossEntropyLoss()

# 7. Inicializar tracking de métricas
best_val_f1 = 0.0
train_metrics = {'loss': [], 'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
val_metrics = {'loss': [], 'accuracy': [], 'precision': [], 'recall': [], 'f1': []}

# 8. Entrenamiento por épocas
for epoch in range(CONFIG["tl_num_epochs"]):
    logger.info(f"Iniciando época {epoch+1}/{CONFIG['tl_num_epochs']}")
    print(f"Iniciando época {epoch+1}/{CONFIG['tl_num_epochs']}")

    # Entrenamiento
    train_results = train_epoch(
        model=model,
        dataloader=train_dataloader,
        optimizer=optimizer,
        scheduler=scheduler,
        criterion=criterion,
        device=device,
        epoch=epoch,
        config=CONFIG
    )

    # Evaluación
    eval_results = evaluate(
        model=model,
        dataloader=val_dataloader,
        criterion=criterion,
        device=device,
        config=CONFIG
    )

    # Registrar métricas
    for metric in ['loss', 'accuracy', 'precision', 'recall', 'f1']:
        train_metrics[metric].append(train_results[metric])
        val_metrics[metric].append(eval_results[metric])

    # Mostrar resultados
    logger.info(f"Epoch {epoch+1}/{CONFIG['tl_num_epochs']} - "
               f"Train Loss: {train_results['loss']:.4f}, "
               f"Val Loss: {eval_results['loss']:.4f}, "
               f"Train Acc: {train_results['accuracy']:.4f}, "
               f"Val Acc: {eval_results['accuracy']:.4f}, "
               f"Val F1: {eval_results['f1']:.4f}")
    print(f"Epoch {epoch+1}/{CONFIG['tl_num_epochs']} - "
          f"Train Loss: {train_results['loss']:.4f}, "
          f"Val Loss: {eval_results['loss']:.4f}, "
          f"Train Acc: {train_results['accuracy']:.4f}, "
          f"Val Acc: {eval_results['accuracy']:.4f}, "
          f"Val F1: {eval_results['f1']:.4f}")

    # Guardar mejor modelo
    if eval_results['f1'] > best_val_f1:
        best_val_f1 = eval_results['f1']

        # Guardar modelo
        model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_tl.pt")
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'val_f1': best_val_f1,
            'config': CONFIG,
        }, model_path)

        logger.info(f"Guardado mejor modelo con F1: {best_val_f1:.4f} en {model_path}")
        print(f"Guardado mejor modelo con F1: {best_val_f1:.4f} en {model_path}")

    # Guardar checkpoint al final de cada época
    checkpoint_path = os.path.join(CONFIG["output_dir"], f"checkpoint_epoch{epoch+1}.pt")
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
        'train_metrics': train_metrics,
        'val_metrics': val_metrics,
    }, checkpoint_path)

    logger.info(f"Guardado checkpoint de época {epoch+1} en {checkpoint_path}")
    print(f"Guardado checkpoint de época {epoch+1} en {checkpoint_path}")


Iniciando fase de Transfer Learning


config.json:   0%|          | 0.00/22.7k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/486M [00:00<?, ?B/s]

Some weights of TimesformerForVideoClassification were not initialized from the model checkpoint at facebook/timesformer-base-finetuned-k400 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([400, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([400]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Parámetros entrenables: 1,538 / 121,260,290 (0.00%)


preprocessor_config.json:   0%|          | 0.00/412 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


model.safetensors:   0%|          | 0.00/486M [00:00<?, ?B/s]

Cargados 8000 videos para split 'train'
Violencia: 4000, No Violencia: 4000
Cargados 1500 videos para split 'val'
Violencia: 750, No Violencia: 750
Iniciando época 1/10


Época 1:  20%|█▉        | 199/1000 [09:04<33:59,  2.55s/it, loss=0.712, acc=0.375, prec=0.2, rec=0.5, f1=0.286]ERROR:__main__:Error en paso 199, época 1: cannot access local variable 'loss' where it is not associated with a value
Época 1:  40%|███▉      | 399/1000 [17:14<31:30,  3.14s/it, loss=0.926, acc=0.5, prec=0.667, rec=0.4, f1=0.5] ERROR:__main__:Error en paso 399, época 1: cannot access local variable 'loss' where it is not associated with a value
Época 1:  60%|█████▉    | 599/1000 [25:49<15:07,  2.26s/it, loss=0.621, acc=0.5, prec=0.667, rec=0.4, f1=0.5]     ERROR:__main__:Error en paso 599, época 1: cannot access local variable 'loss' where it is not associated with a value
Época 1:  80%|███████▉  | 799/1000 [33:44<06:02,  1.80s/it, loss=0.431, acc=0.5, prec=1, rec=0.333, f1=0.5]  ERROR:__main__:Error en paso 799, época 1: cannot access local variable 'loss' where it is not associated with a value
Época 1: 100%|█████████▉| 999/1000 [41:38<00:02,  2.07s/it, loss=0.249, acc=0.87

Epoch 1/10 - Train Loss: 0.6173, Val Loss: 0.3113, Train Acc: 0.5199, Val Acc: 0.7760, Val F1: 0.7214
Guardado mejor modelo con F1: 0.7214 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_tl.pt
Guardado checkpoint de época 1 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_epoch1.pt
Iniciando época 2/10


Época 2:  20%|█▉        | 199/1000 [01:50<07:19,  1.82it/s, loss=0.37, acc=0.75, prec=1, rec=0.667, f1=0.8]ERROR:__main__:Error en paso 199, época 2: cannot access local variable 'loss' where it is not associated with a value
Época 2:  40%|███▉      | 399/1000 [03:41<05:31,  1.82it/s, loss=0.15, acc=0.875, prec=1, rec=0.667, f1=0.8]     ERROR:__main__:Error en paso 399, época 2: cannot access local variable 'loss' where it is not associated with a value
Época 2:  60%|█████▉    | 599/1000 [05:30<03:40,  1.82it/s, loss=0.368, acc=0.75, prec=0.75, rec=0.75, f1=0.75]ERROR:__main__:Error en paso 599, época 2: cannot access local variable 'loss' where it is not associated with a value
Época 2:  80%|███████▉  | 799/1000 [07:20<01:50,  1.82it/s, loss=0.261, acc=0.875, prec=0.8, rec=1, f1=0.889]ERROR:__main__:Error en paso 799, época 2: cannot access local variable 'loss' where it is not associated with a value
Época 2: 100%|█████████▉| 999/1000 [09:10<00:00,  1.82it/s, loss=0.121, acc=0.875, p

Epoch 2/10 - Train Loss: 0.2418, Val Loss: 0.1893, Train Acc: 0.8856, Val Acc: 0.8973, Val F1: 0.8882
Guardado mejor modelo con F1: 0.8882 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_tl.pt
Guardado checkpoint de época 2 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_epoch2.pt
Iniciando época 3/10


Época 3:  20%|█▉        | 199/1000 [01:52<07:19,  1.82it/s, loss=0.255, acc=0.875, prec=0.857, rec=1, f1=0.923]ERROR:__main__:Error en paso 199, época 3: cannot access local variable 'loss' where it is not associated with a value
Época 3:  40%|███▉      | 399/1000 [03:42<05:29,  1.82it/s, loss=0.13, acc=0.875, prec=1, rec=0.833, f1=0.909]ERROR:__main__:Error en paso 399, época 3: cannot access local variable 'loss' where it is not associated with a value
Época 3:  60%|█████▉    | 599/1000 [05:32<03:40,  1.82it/s, loss=0.142, acc=0.875, prec=1, rec=0.8, f1=0.889] ERROR:__main__:Error en paso 599, época 3: cannot access local variable 'loss' where it is not associated with a value
Época 3:  80%|███████▉  | 799/1000 [07:22<01:50,  1.82it/s, loss=0.241, acc=0.875, prec=0.667, rec=1, f1=0.8]ERROR:__main__:Error en paso 799, época 3: cannot access local variable 'loss' where it is not associated with a value
Época 3: 100%|█████████▉| 999/1000 [09:12<00:00,  1.82it/s, loss=0.238, acc=1, prec=

Epoch 3/10 - Train Loss: 0.1828, Val Loss: 0.1600, Train Acc: 0.9175, Val Acc: 0.9167, Val F1: 0.9113
Guardado mejor modelo con F1: 0.9113 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_tl.pt
Guardado checkpoint de época 3 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_epoch3.pt
Iniciando época 4/10


Época 4:  20%|█▉        | 199/1000 [01:56<07:18,  1.83it/s, loss=0.504, acc=0.875, prec=0.833, rec=1, f1=0.909]  ERROR:__main__:Error en paso 199, época 4: cannot access local variable 'loss' where it is not associated with a value
Época 4:  40%|███▉      | 399/1000 [03:46<05:30,  1.82it/s, loss=0.00767, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 399, época 4: cannot access local variable 'loss' where it is not associated with a value
Época 4:  60%|█████▉    | 599/1000 [05:36<03:40,  1.82it/s, loss=0.0319, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 599, época 4: cannot access local variable 'loss' where it is not associated with a value
Época 4:  80%|███████▉  | 799/1000 [07:26<01:50,  1.82it/s, loss=0.0214, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 799, época 4: cannot access local variable 'loss' where it is not associated with a value
Época 4: 100%|█████████▉| 999/1000 [09:16<00:00,  1.82it/s, loss=0.0139, acc=1, prec=1, rec=1, f1=1]         E

Epoch 4/10 - Train Loss: 0.1619, Val Loss: 0.1470, Train Acc: 0.9274, Val Acc: 0.9227, Val F1: 0.9180
Guardado mejor modelo con F1: 0.9180 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_tl.pt
Guardado checkpoint de época 4 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_epoch4.pt
Iniciando época 5/10


Época 5:  20%|█▉        | 199/1000 [01:54<07:18,  1.83it/s, loss=0.0414, acc=1, prec=1, rec=1, f1=1]         ERROR:__main__:Error en paso 199, época 5: cannot access local variable 'loss' where it is not associated with a value
Época 5:  40%|███▉      | 399/1000 [03:44<05:30,  1.82it/s, loss=0.0261, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 399, época 5: cannot access local variable 'loss' where it is not associated with a value
Época 5:  60%|█████▉    | 599/1000 [05:34<03:40,  1.82it/s, loss=0.497, acc=0.875, prec=1, rec=0.8, f1=0.889]ERROR:__main__:Error en paso 599, época 5: cannot access local variable 'loss' where it is not associated with a value
Época 5:  80%|███████▉  | 799/1000 [07:24<01:50,  1.81it/s, loss=0.665, acc=0.875, prec=0.75, rec=1, f1=0.857]ERROR:__main__:Error en paso 799, época 5: cannot access local variable 'loss' where it is not associated with a value
Época 5: 100%|█████████▉| 999/1000 [09:14<00:00,  1.82it/s, loss=0.0667, acc=1, prec=1, rec=1, f

Epoch 5/10 - Train Loss: 0.1506, Val Loss: 0.1389, Train Acc: 0.9346, Val Acc: 0.9253, Val F1: 0.9211
Guardado mejor modelo con F1: 0.9211 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_tl.pt
Guardado checkpoint de época 5 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_epoch5.pt
Iniciando época 6/10


Época 6:  20%|█▉        | 199/1000 [01:54<07:20,  1.82it/s, loss=0.0418, acc=1, prec=1, rec=1, f1=1]          ERROR:__main__:Error en paso 199, época 6: cannot access local variable 'loss' where it is not associated with a value
Época 6:  40%|███▉      | 399/1000 [03:44<05:29,  1.82it/s, loss=0.101, acc=1, prec=1, rec=1, f1=1] ERROR:__main__:Error en paso 399, época 6: cannot access local variable 'loss' where it is not associated with a value
Época 6:  60%|█████▉    | 599/1000 [05:34<03:40,  1.81it/s, loss=0.109, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 599, época 6: cannot access local variable 'loss' where it is not associated with a value
Época 6:  80%|███████▉  | 799/1000 [07:24<01:50,  1.82it/s, loss=0.247, acc=0.75, prec=1, rec=0.714, f1=0.833]ERROR:__main__:Error en paso 799, época 6: cannot access local variable 'loss' where it is not associated with a value
Época 6: 100%|█████████▉| 999/1000 [09:14<00:00,  1.82it/s, loss=0.0894, acc=0.875, prec=1, rec=0.833, f1

Epoch 6/10 - Train Loss: 0.1434, Val Loss: 0.1352, Train Acc: 0.9380, Val Acc: 0.9287, Val F1: 0.9248
Guardado mejor modelo con F1: 0.9248 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_tl.pt
Guardado checkpoint de época 6 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_epoch6.pt
Iniciando época 7/10


Época 7:  20%|█▉        | 199/1000 [01:52<07:20,  1.82it/s, loss=0.248, acc=0.875, prec=0.75, rec=1, f1=0.857]ERROR:__main__:Error en paso 199, época 7: cannot access local variable 'loss' where it is not associated with a value
Época 7:  40%|███▉      | 399/1000 [03:42<05:30,  1.82it/s, loss=0.0938, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 399, época 7: cannot access local variable 'loss' where it is not associated with a value
Época 7:  60%|█████▉    | 599/1000 [05:32<03:40,  1.82it/s, loss=0.0319, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 599, época 7: cannot access local variable 'loss' where it is not associated with a value
Época 7:  80%|███████▉  | 799/1000 [07:22<01:50,  1.82it/s, loss=0.0173, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 799, época 7: cannot access local variable 'loss' where it is not associated with a value
Época 7: 100%|█████████▉| 999/1000 [09:12<00:00,  1.82it/s, loss=0.0144, acc=1, prec=1, rec=1, f1=1]ERROR:__main__

Epoch 7/10 - Train Loss: 0.1391, Val Loss: 0.1322, Train Acc: 0.9409, Val Acc: 0.9287, Val F1: 0.9249
Guardado mejor modelo con F1: 0.9249 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_tl.pt
Guardado checkpoint de época 7 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_epoch7.pt
Iniciando época 8/10


Época 8:  20%|█▉        | 199/1000 [01:55<07:19,  1.82it/s, loss=0.0282, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 199, época 8: cannot access local variable 'loss' where it is not associated with a value
Época 8:  40%|███▉      | 399/1000 [03:45<05:30,  1.82it/s, loss=0.0452, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 399, época 8: cannot access local variable 'loss' where it is not associated with a value
Época 8:  60%|█████▉    | 599/1000 [05:35<03:41,  1.81it/s, loss=0.0355, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 599, época 8: cannot access local variable 'loss' where it is not associated with a value
Época 8:  80%|███████▉  | 799/1000 [07:25<01:50,  1.82it/s, loss=0.174, acc=1, prec=1, rec=1, f1=1] ERROR:__main__:Error en paso 799, época 8: cannot access local variable 'loss' where it is not associated with a value
Época 8: 100%|█████████▉| 999/1000 [09:15<00:00,  1.82it/s, loss=0.234, acc=0.875, prec=1, rec=0.875, f1=0.933]ERROR:__main_

Epoch 8/10 - Train Loss: 0.1365, Val Loss: 0.1309, Train Acc: 0.9416, Val Acc: 0.9293, Val F1: 0.9257
Guardado mejor modelo con F1: 0.9257 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_tl.pt
Guardado checkpoint de época 8 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_epoch8.pt
Iniciando época 9/10


Época 9:  20%|█▉        | 199/1000 [01:53<07:19,  1.82it/s, loss=0.0313, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 199, época 9: cannot access local variable 'loss' where it is not associated with a value
Época 9:  40%|███▉      | 399/1000 [03:43<05:31,  1.81it/s, loss=0.0185, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 399, época 9: cannot access local variable 'loss' where it is not associated with a value
Época 9:  60%|█████▉    | 599/1000 [05:33<03:41,  1.81it/s, loss=0.365, acc=0.875, prec=1, rec=0.833, f1=0.909]ERROR:__main__:Error en paso 599, época 9: cannot access local variable 'loss' where it is not associated with a value
Época 9:  80%|███████▉  | 799/1000 [07:23<01:50,  1.82it/s, loss=0.044, acc=1, prec=1, rec=1, f1=1] ERROR:__main__:Error en paso 799, época 9: cannot access local variable 'loss' where it is not associated with a value
Época 9: 100%|█████████▉| 999/1000 [09:13<00:00,  1.82it/s, loss=0.0322, acc=1, prec=1, rec=1, f1=1]         ERRO

Epoch 9/10 - Train Loss: 0.1353, Val Loss: 0.1304, Train Acc: 0.9425, Val Acc: 0.9313, Val F1: 0.9279
Guardado mejor modelo con F1: 0.9279 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_tl.pt
Guardado checkpoint de época 9 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_epoch9.pt
Iniciando época 10/10


Época 10:  20%|█▉        | 199/1000 [01:53<07:20,  1.82it/s, loss=0.298, acc=0.875, prec=0.8, rec=1, f1=0.889] ERROR:__main__:Error en paso 199, época 10: cannot access local variable 'loss' where it is not associated with a value
Época 10:  40%|███▉      | 399/1000 [03:43<05:30,  1.82it/s, loss=0.275, acc=0.875, prec=1, rec=0.8, f1=0.889]ERROR:__main__:Error en paso 399, época 10: cannot access local variable 'loss' where it is not associated with a value
Época 10:  60%|█████▉    | 599/1000 [05:33<03:40,  1.82it/s, loss=0.238, acc=0.875, prec=1, rec=0.667, f1=0.8]ERROR:__main__:Error en paso 599, época 10: cannot access local variable 'loss' where it is not associated with a value
Época 10:  80%|███████▉  | 799/1000 [07:23<01:50,  1.82it/s, loss=0.183, acc=0.75, prec=1, rec=0.333, f1=0.5]  ERROR:__main__:Error en paso 799, época 10: cannot access local variable 'loss' where it is not associated with a value
Época 10: 100%|█████████▉| 999/1000 [09:13<00:00,  1.82it/s, loss=0.117, acc=1

Epoch 10/10 - Train Loss: 0.1348, Val Loss: 0.1304, Train Acc: 0.9430, Val Acc: 0.9313, Val F1: 0.9279
Guardado checkpoint de época 10 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_epoch10.pt


In [None]:
# 9. Visualizar y guardar métricas
plot_metrics(train_metrics, val_metrics, CONFIG)

# 10. Evaluación final del mejor modelo
# Cargar el mejor modelo
best_model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_tl.pt")
checkpoint = torch.load(best_model_path)
model.load_state_dict(checkpoint['model_state_dict'])

logger.info(f"Evaluando mejor modelo de Transfer Learning (F1: {checkpoint['val_f1']:.4f})")
print(f"Evaluando mejor modelo de Transfer Learning (F1: {checkpoint['val_f1']:.4f})")

final_eval_results = evaluate(
    model=model,
    dataloader=val_dataloader,
    criterion=criterion,
    device=device,
    config=CONFIG
)

# Visualizar matriz de confusión
plot_confusion_matrix(final_eval_results['confusion_matrix'], CONFIG, phase='transfer_learning')

# Visualizar curva ROC
plot_roc_curve(
    final_eval_results['fpr'],
    final_eval_results['tpr'],
    final_eval_results['roc_auc'],
    CONFIG,
    phase='transfer_learning'
)

# Guardar informe detallado
save_evaluation_report(final_eval_results, CONFIG, phase='transfer_learning')

logger.info("Completada fase de Transfer Learning")
print("Completada fase de Transfer Learning")

# Guardar resultados para usarlos en etapas posteriores
tl_results = final_eval_results

Evaluando mejor modelo de Transfer Learning (F1: 0.9279)


Evaluando: 100%|██████████| 188/188 [01:42<00:00,  1.83it/s]


Completada fase de Transfer Learning


# ENTRENAMIENTO CON FINE TUNING

In [None]:
# ============================== ENTRENAMIENTO FINE TUNING ==============================

logger.info("Iniciando fase de Fine-Tuning")
print("Iniciando fase de Fine-Tuning")

# Cargar el mejor modelo de Transfer Learning
best_tl_model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_tl.pt")
checkpoint = torch.load(best_tl_model_path)

# Comprobar si ya tenemos el modelo cargado (de la celda anterior) o necesitamos cargarlo
try:
    # Intentar acceder al modelo, si no está definido, lanzará una excepción
    model
    logger.info("Usando modelo ya cargado de celda anterior")
    print("Usando modelo ya cargado de celda anterior")
    # Cargar estado del modelo desde checkpoint
    model.load_state_dict(checkpoint['model_state_dict'])
except NameError:
    # Si el modelo no está definido, crear uno nuevo y cargarlo
    logger.info("Creando y cargando modelo desde checkpoint")
    print("Creando y cargando modelo desde checkpoint")
    # model = TimesformerForVideoClassification.from_pretrained(
    #     CONFIG["pretrained_model"],
    #     num_frames=CONFIG["num_frames"],
    #     image_size=CONFIG["image_size"],
    #     num_labels=CONFIG["num_classes"],
    #     # ignore_mismatched_sizes=True
    # )

    # Cargar la configuración del modelo pre-entrenado y modificarla para 2 clases
    config = TimesformerConfig.from_pretrained(
        CONFIG["pretrained_model"],
        num_frames=CONFIG["num_frames"],
        image_size=CONFIG["image_size"],
        num_labels=CONFIG["num_classes"], # Asegura que la config especifique 2 clases
    )

    # Crear una instancia del modelo *con la configuración de 2 clases*
    model = TimesformerForVideoClassification(config)

    # Cargar estado del modelo desde checkpoint
    # model.load_state_dict(checkpoint['model_state_dict'])

    # Ahora cargar el state_dict del checkpoint.
    # Como la arquitectura ya coincide (capa de 2 clases), strict=True debería funcionar.
    # El checkpoint contiene el estado COMPLETO del modelo después del TL.
    model.load_state_dict(checkpoint['model_state_dict'], strict=True)

# 1. Descongelar todos los parámetros del modelo
for param in model.parameters():
    param.requires_grad = True

# Verificar parámetros entrenables
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
logger.info(f"Parámetros entrenables: {trainable_params:,} / {total_params:,} ({100 * trainable_params / total_params:.2f}%)")
print(f"Parámetros entrenables: {trainable_params:,} / {total_params:,} ({100 * trainable_params / total_params:.2f}%)")

# Asegurar que el modelo está en el dispositivo correcto
model.to(device)

# 2. Preparar datasets y dataloaders con augmentación adicional para fine-tuning
# Definir transformaciones para data augmentation más agresivas
class VideoAugmentor:
    """Aplica augmentación a nivel de frame para videos"""
    def __init__(self, strength=0.3):
        self.transform = transforms.Compose([
            transforms.ColorJitter(brightness=0.2*strength,
                                  contrast=0.2*strength,
                                  saturation=0.2*strength,
                                  hue=0.1*strength),
            transforms.RandomAffine(degrees=5*strength,
                                   translate=(0.1*strength, 0.1*strength),
                                   scale=(1-0.1*strength, 1+0.1*strength)),
            transforms.RandomHorizontalFlip(p=0.5)
        ])

    def __call__(self, frames):
        """Aplica la misma transformación a todos los frames del video"""
        # Obtener una transformación aleatoria (para consistencia entre frames)
        result = []
        for frame in frames:
            result.append(self.transform(frame))
        return result

# Crear datasets con augmentación para fine-tuning
train_dataset = ViolenceVideoDataset(
    root_dir=CONFIG["dataset_path"],
    split='train',
    num_frames=CONFIG["num_frames"],
    image_size=CONFIG["image_size"],
    transform=VideoAugmentor(strength=0.5)  # Augmentación más fuerte para fine-tuning
)

val_dataset = ViolenceVideoDataset(
    root_dir=CONFIG["dataset_path"],
    split='val',
    num_frames=CONFIG["num_frames"],
    image_size=CONFIG["image_size"]
)

train_dataloader = DataLoader(
    train_dataset,
    batch_size=CONFIG["ft_batch_size"],  # Tamaño de batch más pequeño para fine-tuning
    shuffle=True,
    num_workers=2,
    pin_memory=True
)

val_dataloader = DataLoader(
    val_dataset,
    batch_size=CONFIG["ft_batch_size"],
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

# 3. Configurar optimizador con learning rate diferenciado y weight decay discriminativo
# Agrupar parámetros con diferentes LRs y weight decay
param_groups = [
    # Capas base (backbone) - LR más bajo, WD más alto
    {
        'params': [p for n, p in model.named_parameters()
                  if 'timesformer.encoder.layer' in n and int(n.split('.')[3]) < 8],
        'lr': CONFIG["ft_learning_rate"] * 0.05,
        'weight_decay': CONFIG["ft_weight_decay"] * 2.0
    },
    # Capas intermedias - LR medio, WD estándar
    {
        'params': [p for n, p in model.named_parameters()
                  if 'timesformer.encoder.layer' in n and int(n.split('.')[3]) >= 8],
        'lr': CONFIG["ft_learning_rate"] * 0.1,
        'weight_decay': CONFIG["ft_weight_decay"]
    },
    # Embeddings de tiempo - LR más alto, WD bajo
    {
        'params': [p for n, p in model.named_parameters() if 'time_embeddings' in n],
        'lr': CONFIG["ft_learning_rate"] * 0.5,
        'weight_decay': CONFIG["ft_weight_decay"] * 0.5
    },
    # Clasificador - LR más alto, WD estándar
    {
        'params': [p for n, p in model.named_parameters() if 'classifier' in n],
        'lr': CONFIG["ft_learning_rate"],
        'weight_decay': CONFIG["ft_weight_decay"]
    }
]

optimizer = optim.AdamW(param_groups)

# 4. Scheduler con warm-up y cosine decay
num_training_steps = len(train_dataloader) * CONFIG["ft_num_epochs"]
num_warmup_steps = int(num_training_steps * 0.1)  # 10% del total como warm-up

scheduler = get_cosine_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=num_training_steps
)

# 5. Criterio de pérdida con label smoothing
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)  # Label smoothing para mejor generalización

# 6. Early stopping
class EarlyStopping:
    def __init__(self, patience=3, min_delta=0.001):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_f1):
        if self.best_score is None:
            self.best_score = val_f1
        elif val_f1 < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = val_f1
            self.counter = 0

early_stopping = EarlyStopping(patience=3)

# 7. Inicializar tracking de métricas
best_val_f1 = 0.0
train_metrics = {'loss': [], 'accuracy': [], 'precision': [], 'recall': [], 'f1': [], 'specificity': []}
val_metrics = {'loss': [], 'accuracy': [], 'precision': [], 'recall': [], 'f1': [], 'specificity': []}

# Función para guardar todas las métricas y visualizaciones
def save_complete_metrics(eval_results, config, phase='fine_tuning'):
    """Guarda y visualiza todas las métricas requeridas"""
    # 1. Guardar informe detallado como JSON
    report = {
        'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        'phase': phase,
        'metrics': {
            'loss': float(eval_results['loss']),
            'accuracy': float(eval_results['accuracy']),
            'precision': float(eval_results['precision']),
            'recall': float(eval_results['recall']),
            'specificity': float(eval_results['specificity']),
            'f1_score': float(eval_results['f1']),
            'roc_auc': float(eval_results['roc_auc']),
            'true_positive_rate': float(eval_results['recall']),  # TPR = Recall
            'false_positive_rate': 1 - float(eval_results['specificity']),  # FPR = 1 - Specificity
        },
        'confusion_matrix': eval_results['confusion_matrix'].tolist(),
    }

    # Guardar informe en formato JSON
    with open(os.path.join(config["output_dir"], f"evaluation_report_{phase}.json"), 'w') as f:
        json.dump(report, f, indent=4)

    # 2. Matriz de confusión
    plt.figure(figsize=(10, 8))
    cm = eval_results['confusion_matrix']
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['No Violencia', 'Violencia'],
                yticklabels=['No Violencia', 'Violencia'])
    plt.xlabel('Predicción')
    plt.ylabel('Real')
    plt.title('Matriz de Confusión')
    plt.tight_layout()
    plt.savefig(os.path.join(config["output_dir"], f"confusion_matrix_{phase}.png"))
    plt.close()

    # 3. Curva ROC
    plt.figure(figsize=(10, 8))
    plt.plot(eval_results['fpr'], eval_results['tpr'], color='darkorange', lw=2,
             label=f'ROC curve (area = {eval_results["roc_auc"]:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC)')
    plt.legend(loc="lower right")
    plt.grid(True)
    plt.savefig(os.path.join(config["output_dir"], f"roc_curve_{phase}.png"))
    plt.close()

    # 4. Curva Precision-Recall
    precision, recall, _ = precision_recall_curve(
        eval_results['labels'],
        eval_results['predictions']
    )
    pr_auc = average_precision_score(eval_results['labels'], eval_results['predictions'])

    plt.figure(figsize=(10, 8))
    plt.plot(recall, precision, color='blue', lw=2, label=f'PR curve (AP = {pr_auc:.2f})')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    plt.legend(loc="lower left")
    plt.grid(True)
    plt.savefig(os.path.join(config["output_dir"], f"precision_recall_curve_{phase}.png"))
    plt.close()

    # 5. Gráfico de barras de métricas principales
    plt.figure(figsize=(12, 6))
    metrics = ['Accuracy', 'Precision', 'Recall (TPR)', 'Specificity', 'F1-Score', 'ROC AUC']
    values = [
        report['metrics']['accuracy'],
        report['metrics']['precision'],
        report['metrics']['recall'],
        report['metrics']['specificity'],
        report['metrics']['f1_score'],
        report['metrics']['roc_auc']
    ]

    colors = ['blue', 'green', 'red', 'purple', 'orange', 'teal']
    plt.bar(metrics, values, color=colors)
    plt.ylim([0, 1.05])
    plt.ylabel('Valor')
    plt.title('Resumen de Métricas de Rendimiento')
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Añadir valores en las barras
    for i, v in enumerate(values):
        plt.text(i, v + 0.02, f"{v:.4f}", ha='center')

    plt.tight_layout()
    plt.savefig(os.path.join(config["output_dir"], f"metrics_summary_{phase}.png"))
    plt.close()

    # 6. Crear informe en texto para fácil lectura
    with open(os.path.join(config["output_dir"], f"evaluation_report_{phase}.txt"), 'w') as f:
        f.write(f"=== REPORTE COMPLETO DE EVALUACIÓN - {phase.upper()} ===\n")
        f.write(f"Fecha: {report['timestamp']}\n\n")

        f.write("=== MÉTRICAS DE RENDIMIENTO ===\n")
        f.write(f"• Accuracy: {report['metrics']['accuracy']:.4f}\n")
        f.write(f"• Precision: {report['metrics']['precision']:.4f}\n")
        f.write(f"• Recall (Sensibilidad / TPR): {report['metrics']['recall']:.4f}\n")
        f.write(f"• Specificity: {report['metrics']['specificity']:.4f}\n")
        f.write(f"• False Positive Rate (FPR): {report['metrics']['false_positive_rate']:.4f}\n")
        f.write(f"• F1-Score: {report['metrics']['f1_score']:.4f}\n")
        f.write(f"• ROC AUC: {report['metrics']['roc_auc']:.4f}\n")
        f.write(f"• Precision-Recall AUC: {pr_auc:.4f}\n\n")

        f.write("=== MATRIZ DE CONFUSIÓN ===\n")
        f.write("                   | Pred: No Violencia | Pred: Violencia |\n")
        f.write(f"Real: No Violencia | {cm[0][0]:^18} | {cm[0][1]:^15} |\n")
        f.write(f"Real: Violencia    | {cm[1][0]:^18} | {cm[1][1]:^15} |\n\n")

        f.write("=== INTERPRETACIÓN ===\n")
        acc_quality = "EXCELENTE" if report['metrics']['accuracy'] > 0.9 else "BUENO" if report['metrics']['accuracy'] > 0.8 else "REGULAR"
        f1_quality = "EXCELENTE" if report['metrics']['f1_score'] > 0.9 else "BUENO" if report['metrics']['f1_score'] > 0.8 else "REGULAR"

        f.write(f"• Calidad del modelo (Accuracy): {acc_quality}\n")
        f.write(f"• Calidad del modelo (F1-Score): {f1_quality}\n")
        f.write(f"• Equilibrio Precision-Recall: {min(report['metrics']['precision'], report['metrics']['recall'])/max(report['metrics']['precision'], report['metrics']['recall']):.2f}\n")

        # Análisis de errores
        if cm[0][1] > cm[1][0]:
            f.write("• El modelo tiende a generar más falsos positivos (clasificar incorrectamente como violencia)\n")
        elif cm[0][1] < cm[1][0]:
            f.write("• El modelo tiende a generar más falsos negativos (no detectar violencia real)\n")
        else:
            f.write("• El modelo es equilibrado en sus errores\n")

    return report

# 8. Entrenamiento por épocas
for epoch in range(CONFIG["ft_num_epochs"]):
    logger.info(f"Iniciando época {epoch+1}/{CONFIG['ft_num_epochs']} (fine-tuning)")
    print(f"Iniciando época {epoch+1}/{CONFIG['ft_num_epochs']} (fine-tuning)")

    # Entrenamiento
    train_results = train_epoch(
        model=model,
        dataloader=train_dataloader,
        optimizer=optimizer,
        scheduler=scheduler,
        criterion=criterion,
        device=device,
        epoch=epoch,
        config=CONFIG
    )

    # Evaluación
    eval_results = evaluate(
        model=model,
        dataloader=val_dataloader,
        criterion=criterion,
        device=device,
        config=CONFIG
    )

    # Registrar métricas
    for metric in ['loss', 'accuracy', 'precision', 'recall', 'f1', 'specificity']:
        if metric in train_results:
            train_metrics[metric].append(train_results[metric])
        if metric in eval_results:
            val_metrics[metric].append(eval_results[metric])

    # Mostrar resultados
    logger.info(f"Epoch {epoch+1}/{CONFIG['ft_num_epochs']} (FT) - "
               f"Train Loss: {train_results['loss']:.4f}, "
               f"Val Loss: {eval_results['loss']:.4f}, "
               f"Train Acc: {train_results['accuracy']:.4f}, "
               f"Val Acc: {eval_results['accuracy']:.4f}, "
               f"Val F1: {eval_results['f1']:.4f}, "
               f"Val Specificity: {eval_results['specificity']:.4f}")
    print(f"Epoch {epoch+1}/{CONFIG['ft_num_epochs']} (FT) - "
          f"Train Loss: {train_results['loss']:.4f}, "
          f"Val Loss: {eval_results['loss']:.4f}, "
          f"Train Acc: {train_results['accuracy']:.4f}, "
          f"Val Acc: {eval_results['accuracy']:.4f}, "
          f"Val F1: {eval_results['f1']:.4f}, "
          f"Val Specificity: {eval_results['specificity']:.4f}")

    # Guardar mejor modelo
    if eval_results['f1'] > best_val_f1:
        best_val_f1 = eval_results['f1']

        # Guardar modelo
        model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_ft.pt")
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict() if scheduler else None,
            'val_f1': best_val_f1,
            'config': CONFIG,
        }, model_path)

        logger.info(f"Guardado mejor modelo (FT) con F1: {best_val_f1:.4f} en {model_path}")
        print(f"Guardado mejor modelo (FT) con F1: {best_val_f1:.4f} en {model_path}")

    # Guardar checkpoint al final de cada época
    checkpoint_path = os.path.join(CONFIG["output_dir"], f"checkpoint_ft_epoch{epoch+1}.pt")
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict() if scheduler else None,
        'train_metrics': train_metrics,
        'val_metrics': val_metrics,
    }, checkpoint_path)

    logger.info(f"Guardado checkpoint de fine-tuning época {epoch+1} en {checkpoint_path}")
    print(f"Guardado checkpoint de fine-tuning época {epoch+1} en {checkpoint_path}")

    # Early stopping
    early_stopping(eval_results['f1'])
    if early_stopping.early_stop:
        logger.info(f"Early stopping activado en época {epoch+1}")
        print(f"Early stopping activado en época {epoch+1}")
        break



Iniciando fase de Fine-Tuning
Creando y cargando modelo desde checkpoint
Parámetros entrenables: 121,260,290 / 121,260,290 (100.00%)


preprocessor_config.json:   0%|          | 0.00/412 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Cargados 8000 videos para split 'train'
Violencia: 4000, No Violencia: 4000
Cargados 1500 videos para split 'val'
Violencia: 750, No Violencia: 750
Iniciando época 1/10 (fine-tuning)


Época 1:  20%|█▉        | 199/1000 [10:52<30:51,  2.31s/it, loss=0.0106, acc=1, prec=1, rec=1, f1=1]        ERROR:__main__:Error en paso 199, época 1: cannot access local variable 'loss' where it is not associated with a value
Época 1:  40%|███▉      | 399/1000 [18:48<21:51,  2.18s/it, loss=0.0996, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 399, época 1: cannot access local variable 'loss' where it is not associated with a value
Época 1:  60%|█████▉    | 599/1000 [26:32<18:07,  2.71s/it, loss=0.127, acc=1, prec=1, rec=1, f1=1] ERROR:__main__:Error en paso 599, época 1: cannot access local variable 'loss' where it is not associated with a value
Época 1:  80%|███████▉  | 799/1000 [34:36<08:51,  2.64s/it, loss=0.0321, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 799, época 1: cannot access local variable 'loss' where it is not associated with a value
Época 1: 100%|█████████▉| 999/1000 [43:19<00:02,  2.65s/it, loss=0.0394, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:E

Epoch 1/10 (FT) - Train Loss: 0.1266, Val Loss: 0.1082, Train Acc: 0.9484, Val Acc: 0.9193, Val F1: 0.9126, Val Specificity: 0.9960
Guardado mejor modelo (FT) con F1: 0.9126 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_ft.pt
Guardado checkpoint de fine-tuning época 1 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_ft_epoch1.pt
Iniciando época 2/10 (fine-tuning)


Época 2:  20%|█▉        | 199/1000 [05:42<22:43,  1.70s/it, loss=0.39, acc=0.75, prec=0.8, rec=0.8, f1=0.8]  ERROR:__main__:Error en paso 199, época 2: cannot access local variable 'loss' where it is not associated with a value
Época 2:  40%|███▉      | 399/1000 [11:23<17:03,  1.70s/it, loss=0.0287, acc=0.875, prec=0.833, rec=1, f1=0.909]ERROR:__main__:Error en paso 399, época 2: cannot access local variable 'loss' where it is not associated with a value
Época 2:  60%|█████▉    | 599/1000 [17:05<11:25,  1.71s/it, loss=0.0894, acc=0.875, prec=0.75, rec=1, f1=0.857]ERROR:__main__:Error en paso 599, época 2: cannot access local variable 'loss' where it is not associated with a value
Época 2:  80%|███████▉  | 799/1000 [22:46<05:43,  1.71s/it, loss=0.101, acc=0.875, prec=1, rec=0.75, f1=0.857]ERROR:__main__:Error en paso 799, época 2: cannot access local variable 'loss' where it is not associated with a value
Época 2: 100%|█████████▉| 999/1000 [28:27<00:01,  1.71s/it, loss=0.00208, acc=1, p

Epoch 2/10 (FT) - Train Loss: 0.0910, Val Loss: 0.0777, Train Acc: 0.9651, Val Acc: 0.9573, Val F1: 0.9558, Val Specificity: 0.9920
Guardado mejor modelo (FT) con F1: 0.9558 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_ft.pt
Guardado checkpoint de fine-tuning época 2 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_ft_epoch2.pt
Iniciando época 3/10 (fine-tuning)


Época 3:  20%|█▉        | 199/1000 [06:04<22:47,  1.71s/it, loss=0.0137, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 199, época 3: cannot access local variable 'loss' where it is not associated with a value
Época 3:  40%|███▉      | 399/1000 [11:45<17:03,  1.70s/it, loss=0.0169, acc=1, prec=1, rec=1, f1=1] ERROR:__main__:Error en paso 399, época 3: cannot access local variable 'loss' where it is not associated with a value
Época 3:  60%|█████▉    | 599/1000 [17:25<11:23,  1.70s/it, loss=0.00141, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 599, época 3: cannot access local variable 'loss' where it is not associated with a value
Época 3:  80%|███████▉  | 799/1000 [23:06<05:41,  1.70s/it, loss=0.0707, acc=0.875, prec=0.833, rec=1, f1=0.909]ERROR:__main__:Error en paso 799, época 3: cannot access local variable 'loss' where it is not associated with a value
Época 3: 100%|█████████▉| 999/1000 [28:47<00:01,  1.71s/it, loss=0.00135, acc=1, prec=1, rec=1, f1=1]ERROR:__m

Epoch 3/10 (FT) - Train Loss: 0.0600, Val Loss: 0.0611, Train Acc: 0.9764, Val Acc: 0.9620, Val F1: 0.9608, Val Specificity: 0.9933
Guardado mejor modelo (FT) con F1: 0.9608 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_ft.pt
Guardado checkpoint de fine-tuning época 3 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_ft_epoch3.pt
Iniciando época 4/10 (fine-tuning)


Época 4:  20%|█▉        | 199/1000 [06:03<22:47,  1.71s/it, loss=0.00028, acc=1, prec=1, rec=1, f1=1] ERROR:__main__:Error en paso 199, época 4: cannot access local variable 'loss' where it is not associated with a value
Época 4:  40%|███▉      | 399/1000 [11:44<17:06,  1.71s/it, loss=0.00421, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 399, época 4: cannot access local variable 'loss' where it is not associated with a value
Época 4:  60%|█████▉    | 599/1000 [17:25<11:21,  1.70s/it, loss=0.00444, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 599, época 4: cannot access local variable 'loss' where it is not associated with a value
Época 4:  80%|███████▉  | 799/1000 [23:06<05:43,  1.71s/it, loss=0.00286, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 799, época 4: cannot access local variable 'loss' where it is not associated with a value
Época 4: 100%|█████████▉| 999/1000 [28:47<00:01,  1.70s/it, loss=0.00583, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Err

Epoch 4/10 (FT) - Train Loss: 0.0396, Val Loss: 0.0510, Train Acc: 0.9859, Val Acc: 0.9700, Val F1: 0.9693, Val Specificity: 0.9933
Guardado mejor modelo (FT) con F1: 0.9693 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_ft.pt
Guardado checkpoint de fine-tuning época 4 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_ft_epoch4.pt
Iniciando época 5/10 (fine-tuning)


Época 5:  20%|█▉        | 199/1000 [06:03<22:48,  1.71s/it, loss=0.0108, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 199, época 5: cannot access local variable 'loss' where it is not associated with a value
Época 5:  40%|███▉      | 399/1000 [11:44<17:06,  1.71s/it, loss=0.000839, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 399, época 5: cannot access local variable 'loss' where it is not associated with a value
Época 5:  60%|█████▉    | 599/1000 [17:25<11:24,  1.71s/it, loss=0.00257, acc=1, prec=1, rec=1, f1=1] ERROR:__main__:Error en paso 599, época 5: cannot access local variable 'loss' where it is not associated with a value
Época 5:  80%|███████▉  | 799/1000 [23:06<05:43,  1.71s/it, loss=0.00013, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 799, época 5: cannot access local variable 'loss' where it is not associated with a value
Época 5: 100%|█████████▉| 999/1000 [28:47<00:01,  1.70s/it, loss=0.00109, acc=1, prec=1, rec=1, f1=1]        ERROR:__ma

Epoch 5/10 (FT) - Train Loss: 0.0274, Val Loss: 0.0457, Train Acc: 0.9899, Val Acc: 0.9727, Val F1: 0.9720, Val Specificity: 0.9960
Guardado mejor modelo (FT) con F1: 0.9720 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_ft.pt
Guardado checkpoint de fine-tuning época 5 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_ft_epoch5.pt
Iniciando época 6/10 (fine-tuning)


Época 6:  20%|█▉        | 199/1000 [06:03<22:41,  1.70s/it, loss=0.00243, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 199, época 6: cannot access local variable 'loss' where it is not associated with a value
Época 6:  40%|███▉      | 399/1000 [11:43<17:05,  1.71s/it, loss=0.00228, acc=1, prec=1, rec=1, f1=1] ERROR:__main__:Error en paso 399, época 6: cannot access local variable 'loss' where it is not associated with a value
Época 6:  60%|█████▉    | 599/1000 [17:24<11:21,  1.70s/it, loss=0.561, acc=0.875, prec=1, rec=0.8, f1=0.889]ERROR:__main__:Error en paso 599, época 6: cannot access local variable 'loss' where it is not associated with a value
Época 6:  80%|███████▉  | 799/1000 [23:05<05:43,  1.71s/it, loss=0.00112, acc=1, prec=1, rec=1, f1=1] ERROR:__main__:Error en paso 799, época 6: cannot access local variable 'loss' where it is not associated with a value
Época 6: 100%|█████████▉| 999/1000 [28:46<00:01,  1.71s/it, loss=0.000837, acc=1, prec=1, rec=1, f1=1]ERROR:__

Epoch 6/10 (FT) - Train Loss: 0.0196, Val Loss: 0.0398, Train Acc: 0.9941, Val Acc: 0.9787, Val F1: 0.9783, Val Specificity: 0.9960
Guardado mejor modelo (FT) con F1: 0.9783 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_ft.pt
Guardado checkpoint de fine-tuning época 6 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_ft_epoch6.pt
Iniciando época 7/10 (fine-tuning)


Época 7:  20%|█▉        | 199/1000 [06:05<22:41,  1.70s/it, loss=0.000409, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 199, época 7: cannot access local variable 'loss' where it is not associated with a value
Época 7:  40%|███▉      | 399/1000 [11:46<17:07,  1.71s/it, loss=0.0102, acc=1, prec=1, rec=1, f1=1]  ERROR:__main__:Error en paso 399, época 7: cannot access local variable 'loss' where it is not associated with a value
Época 7:  55%|█████▌    | 550/1000 [16:02<12:45,  1.70s/it, loss=0.000943, acc=1, prec=1, rec=1, f1=1]

In [None]:
# ============================== FINE TUNING ==============================

logger.info("Iniciando fase de Fine-Tuning")
print("Iniciando fase de Fine-Tuning")

# Cargar el mejor modelo de Transfer Learning
best_tl_model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_tl.pt")
checkpoint = torch.load(best_tl_model_path)

# Comprobar si ya tenemos el modelo cargado (de la celda anterior) o necesitamos cargarlo
try:
    # Intentar acceder al modelo, si no está definido, lanzará una excepción
    model
    logger.info("Usando modelo ya cargado de celda anterior")
    print("Usando modelo ya cargado de celda anterior")
    # Cargar estado del modelo desde checkpoint
    model.load_state_dict(checkpoint['model_state_dict'])
except NameError:
    # Si el modelo no está definido, crear uno nuevo y cargarlo
    logger.info("Creando y cargando modelo desde checkpoint")
    print("Creando y cargando modelo desde checkpoint")
    # model = TimesformerForVideoClassification.from_pretrained(
    #     CONFIG["pretrained_model"],
    #     num_frames=CONFIG["num_frames"],
    #     image_size=CONFIG["image_size"],
    #     num_labels=CONFIG["num_classes"],
    #     # ignore_mismatched_sizes=True
    # )

    # Cargar la configuración del modelo pre-entrenado y modificarla para 2 clases
    config = TimesformerConfig.from_pretrained(
        CONFIG["pretrained_model"],
        num_frames=CONFIG["num_frames"],
        image_size=CONFIG["image_size"],
        num_labels=CONFIG["num_classes"], # Asegura que la config especifique 2 clases
    )

    # Crear una instancia del modelo *con la configuración de 2 clases*
    model = TimesformerForVideoClassification(config)

    # Cargar estado del modelo desde checkpoint
    # model.load_state_dict(checkpoint['model_state_dict'])

    # Ahora cargar el state_dict del checkpoint.
    # Como la arquitectura ya coincide (capa de 2 clases), strict=True debería funcionar.
    # El checkpoint contiene el estado COMPLETO del modelo después del TL.
    model.load_state_dict(checkpoint['model_state_dict'], strict=True)

# 1. Descongelar todos los parámetros del modelo
for param in model.parameters():
    param.requires_grad = True

# Verificar parámetros entrenables
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
logger.info(f"Parámetros entrenables: {trainable_params:,} / {total_params:,} ({100 * trainable_params / total_params:.2f}%)")
print(f"Parámetros entrenables: {trainable_params:,} / {total_params:,} ({100 * trainable_params / total_params:.2f}%)")

# Asegurar que el modelo está en el dispositivo correcto
model.to(device)

# 2. Preparar datasets y dataloaders con augmentación adicional para fine-tuning
# Definir transformaciones para data augmentation más agresivas
class VideoAugmentor:
    """Aplica augmentación a nivel de frame para videos"""
    def __init__(self, strength=0.3):
        self.transform = transforms.Compose([
            transforms.ColorJitter(brightness=0.2*strength,
                                  contrast=0.2*strength,
                                  saturation=0.2*strength,
                                  hue=0.1*strength),
            transforms.RandomAffine(degrees=5*strength,
                                   translate=(0.1*strength, 0.1*strength),
                                   scale=(1-0.1*strength, 1+0.1*strength)),
            transforms.RandomHorizontalFlip(p=0.5)
        ])

    def __call__(self, frames):
        """Aplica la misma transformación a todos los frames del video"""
        # Obtener una transformación aleatoria (para consistencia entre frames)
        result = []
        for frame in frames:
            result.append(self.transform(frame))
        return result

# Crear datasets con augmentación para fine-tuning
train_dataset = ViolenceVideoDataset(
    root_dir=CONFIG["dataset_path"],
    split='train',
    num_frames=CONFIG["num_frames"],
    image_size=CONFIG["image_size"],
    transform=VideoAugmentor(strength=0.5)  # Augmentación más fuerte para fine-tuning
)

val_dataset = ViolenceVideoDataset(
    root_dir=CONFIG["dataset_path"],
    split='val',
    num_frames=CONFIG["num_frames"],
    image_size=CONFIG["image_size"]
)

train_dataloader = DataLoader(
    train_dataset,
    batch_size=CONFIG["ft_batch_size"],  # Tamaño de batch más pequeño para fine-tuning
    shuffle=True,
    num_workers=2,
    pin_memory=True
)

val_dataloader = DataLoader(
    val_dataset,
    batch_size=CONFIG["ft_batch_size"],
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

# 3. Configurar optimizador con learning rate diferenciado y weight decay discriminativo
# Agrupar parámetros con diferentes LRs y weight decay
param_groups = [
    # Capas base (backbone) - LR más bajo, WD más alto
    {
        'params': [p for n, p in model.named_parameters()
                  if 'timesformer.encoder.layer' in n and int(n.split('.')[3]) < 8],
        'lr': CONFIG["ft_learning_rate"] * 0.05,
        'weight_decay': CONFIG["ft_weight_decay"] * 2.0
    },
    # Capas intermedias - LR medio, WD estándar
    {
        'params': [p for n, p in model.named_parameters()
                  if 'timesformer.encoder.layer' in n and int(n.split('.')[3]) >= 8],
        'lr': CONFIG["ft_learning_rate"] * 0.1,
        'weight_decay': CONFIG["ft_weight_decay"]
    },
    # Embeddings de tiempo - LR más alto, WD bajo
    {
        'params': [p for n, p in model.named_parameters() if 'time_embeddings' in n],
        'lr': CONFIG["ft_learning_rate"] * 0.5,
        'weight_decay': CONFIG["ft_weight_decay"] * 0.5
    },
    # Clasificador - LR más alto, WD estándar
    {
        'params': [p for n, p in model.named_parameters() if 'classifier' in n],
        'lr': CONFIG["ft_learning_rate"],
        'weight_decay': CONFIG["ft_weight_decay"]
    }
]

optimizer = optim.AdamW(param_groups)

# 4. Scheduler con warm-up y cosine decay
num_training_steps = len(train_dataloader) * CONFIG["ft_num_epochs"]
num_warmup_steps = int(num_training_steps * 0.1)  # 10% del total como warm-up

scheduler = get_cosine_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=num_training_steps
)

# 5. Criterio de pérdida con label smoothing
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)  # Label smoothing para mejor generalización

# 6. Early stopping
class EarlyStopping:
    def __init__(self, patience=3, min_delta=0.001):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_f1):
        if self.best_score is None:
            self.best_score = val_f1
        elif val_f1 < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = val_f1
            self.counter = 0

early_stopping = EarlyStopping(patience=3)

# 7. Inicializar tracking de métricas
best_val_f1 = 0.0
train_metrics = {'loss': [], 'accuracy': [], 'precision': [], 'recall': [], 'f1': [], 'specificity': []}
val_metrics = {'loss': [], 'accuracy': [], 'precision': [], 'recall': [], 'f1': [], 'specificity': []}

# Función para guardar todas las métricas y visualizaciones
def save_complete_metrics(eval_results, config, phase='fine_tuning'):
    """Guarda y visualiza todas las métricas requeridas"""
    # 1. Guardar informe detallado como JSON
    report = {
        'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        'phase': phase,
        'metrics': {
            'loss': float(eval_results['loss']),
            'accuracy': float(eval_results['accuracy']),
            'precision': float(eval_results['precision']),
            'recall': float(eval_results['recall']),
            'specificity': float(eval_results['specificity']),
            'f1_score': float(eval_results['f1']),
            'roc_auc': float(eval_results['roc_auc']),
            'true_positive_rate': float(eval_results['recall']),  # TPR = Recall
            'false_positive_rate': 1 - float(eval_results['specificity']),  # FPR = 1 - Specificity
        },
        'confusion_matrix': eval_results['confusion_matrix'].tolist(),
    }

    # Guardar informe en formato JSON
    with open(os.path.join(config["output_dir"], f"evaluation_report_{phase}.json"), 'w') as f:
        json.dump(report, f, indent=4)

    # 2. Matriz de confusión
    plt.figure(figsize=(10, 8))
    cm = eval_results['confusion_matrix']
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['No Violencia', 'Violencia'],
                yticklabels=['No Violencia', 'Violencia'])
    plt.xlabel('Predicción')
    plt.ylabel('Real')
    plt.title('Matriz de Confusión')
    plt.tight_layout()
    plt.savefig(os.path.join(config["output_dir"], f"confusion_matrix_{phase}.png"))
    plt.close()

    # 3. Curva ROC
    plt.figure(figsize=(10, 8))
    plt.plot(eval_results['fpr'], eval_results['tpr'], color='darkorange', lw=2,
             label=f'ROC curve (area = {eval_results["roc_auc"]:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC)')
    plt.legend(loc="lower right")
    plt.grid(True)
    plt.savefig(os.path.join(config["output_dir"], f"roc_curve_{phase}.png"))
    plt.close()

    # 4. Curva Precision-Recall
    precision, recall, _ = precision_recall_curve(
        eval_results['labels'],
        eval_results['predictions']
    )
    pr_auc = average_precision_score(eval_results['labels'], eval_results['predictions'])

    plt.figure(figsize=(10, 8))
    plt.plot(recall, precision, color='blue', lw=2, label=f'PR curve (AP = {pr_auc:.2f})')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    plt.legend(loc="lower left")
    plt.grid(True)
    plt.savefig(os.path.join(config["output_dir"], f"precision_recall_curve_{phase}.png"))
    plt.close()

    # 5. Gráfico de barras de métricas principales
    plt.figure(figsize=(12, 6))
    metrics = ['Accuracy', 'Precision', 'Recall (TPR)', 'Specificity', 'F1-Score', 'ROC AUC']
    values = [
        report['metrics']['accuracy'],
        report['metrics']['precision'],
        report['metrics']['recall'],
        report['metrics']['specificity'],
        report['metrics']['f1_score'],
        report['metrics']['roc_auc']
    ]

    colors = ['blue', 'green', 'red', 'purple', 'orange', 'teal']
    plt.bar(metrics, values, color=colors)
    plt.ylim([0, 1.05])
    plt.ylabel('Valor')
    plt.title('Resumen de Métricas de Rendimiento')
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Añadir valores en las barras
    for i, v in enumerate(values):
        plt.text(i, v + 0.02, f"{v:.4f}", ha='center')

    plt.tight_layout()
    plt.savefig(os.path.join(config["output_dir"], f"metrics_summary_{phase}.png"))
    plt.close()

    # 6. Crear informe en texto para fácil lectura
    with open(os.path.join(config["output_dir"], f"evaluation_report_{phase}.txt"), 'w') as f:
        f.write(f"=== REPORTE COMPLETO DE EVALUACIÓN - {phase.upper()} ===\n")
        f.write(f"Fecha: {report['timestamp']}\n\n")

        f.write("=== MÉTRICAS DE RENDIMIENTO ===\n")
        f.write(f"• Accuracy: {report['metrics']['accuracy']:.4f}\n")
        f.write(f"• Precision: {report['metrics']['precision']:.4f}\n")
        f.write(f"• Recall (Sensibilidad / TPR): {report['metrics']['recall']:.4f}\n")
        f.write(f"• Specificity: {report['metrics']['specificity']:.4f}\n")
        f.write(f"• False Positive Rate (FPR): {report['metrics']['false_positive_rate']:.4f}\n")
        f.write(f"• F1-Score: {report['metrics']['f1_score']:.4f}\n")
        f.write(f"• ROC AUC: {report['metrics']['roc_auc']:.4f}\n")
        f.write(f"• Precision-Recall AUC: {pr_auc:.4f}\n\n")

        f.write("=== MATRIZ DE CONFUSIÓN ===\n")
        f.write("                   | Pred: No Violencia | Pred: Violencia |\n")
        f.write(f"Real: No Violencia | {cm[0][0]:^18} | {cm[0][1]:^15} |\n")
        f.write(f"Real: Violencia    | {cm[1][0]:^18} | {cm[1][1]:^15} |\n\n")

        f.write("=== INTERPRETACIÓN ===\n")
        acc_quality = "EXCELENTE" if report['metrics']['accuracy'] > 0.9 else "BUENO" if report['metrics']['accuracy'] > 0.8 else "REGULAR"
        f1_quality = "EXCELENTE" if report['metrics']['f1_score'] > 0.9 else "BUENO" if report['metrics']['f1_score'] > 0.8 else "REGULAR"

        f.write(f"• Calidad del modelo (Accuracy): {acc_quality}\n")
        f.write(f"• Calidad del modelo (F1-Score): {f1_quality}\n")
        f.write(f"• Equilibrio Precision-Recall: {min(report['metrics']['precision'], report['metrics']['recall'])/max(report['metrics']['precision'], report['metrics']['recall']):.2f}\n")

        # Análisis de errores
        if cm[0][1] > cm[1][0]:
            f.write("• El modelo tiende a generar más falsos positivos (clasificar incorrectamente como violencia)\n")
        elif cm[0][1] < cm[1][0]:
            f.write("• El modelo tiende a generar más falsos negativos (no detectar violencia real)\n")
        else:
            f.write("• El modelo es equilibrado en sus errores\n")

    return report

# 8. Entrenamiento por épocas
for epoch in range(CONFIG["ft_num_epochs"]):
    logger.info(f"Iniciando época {epoch+1}/{CONFIG['ft_num_epochs']} (fine-tuning)")
    print(f"Iniciando época {epoch+1}/{CONFIG['ft_num_epochs']} (fine-tuning)")

    # Entrenamiento
    train_results = train_epoch(
        model=model,
        dataloader=train_dataloader,
        optimizer=optimizer,
        scheduler=scheduler,
        criterion=criterion,
        device=device,
        epoch=epoch,
        config=CONFIG
    )

    # Evaluación
    eval_results = evaluate(
        model=model,
        dataloader=val_dataloader,
        criterion=criterion,
        device=device,
        config=CONFIG
    )

    # Registrar métricas
    for metric in ['loss', 'accuracy', 'precision', 'recall', 'f1', 'specificity']:
        if metric in train_results:
            train_metrics[metric].append(train_results[metric])
        if metric in eval_results:
            val_metrics[metric].append(eval_results[metric])

    # Mostrar resultados
    logger.info(f"Epoch {epoch+1}/{CONFIG['ft_num_epochs']} (FT) - "
               f"Train Loss: {train_results['loss']:.4f}, "
               f"Val Loss: {eval_results['loss']:.4f}, "
               f"Train Acc: {train_results['accuracy']:.4f}, "
               f"Val Acc: {eval_results['accuracy']:.4f}, "
               f"Val F1: {eval_results['f1']:.4f}, "
               f"Val Specificity: {eval_results['specificity']:.4f}")
    print(f"Epoch {epoch+1}/{CONFIG['ft_num_epochs']} (FT) - "
          f"Train Loss: {train_results['loss']:.4f}, "
          f"Val Loss: {eval_results['loss']:.4f}, "
          f"Train Acc: {train_results['accuracy']:.4f}, "
          f"Val Acc: {eval_results['accuracy']:.4f}, "
          f"Val F1: {eval_results['f1']:.4f}, "
          f"Val Specificity: {eval_results['specificity']:.4f}")

    # Guardar mejor modelo
    if eval_results['f1'] > best_val_f1:
        best_val_f1 = eval_results['f1']

        # Guardar modelo
        model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_ft2.pt")
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict() if scheduler else None,
            'val_f1': best_val_f1,
            'config': CONFIG,
        }, model_path)

        logger.info(f"Guardado mejor modelo (FT) con F1: {best_val_f1:.4f} en {model_path}")
        print(f"Guardado mejor modelo (FT) con F1: {best_val_f1:.4f} en {model_path}")

    # Guardar checkpoint al final de cada época
    checkpoint_path = os.path.join(CONFIG["output_dir"], f"checkpoint_ft_epoch{epoch+1}_2.pt")
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict() if scheduler else None,
        'train_metrics': train_metrics,
        'val_metrics': val_metrics,
    }, checkpoint_path)

    logger.info(f"Guardado checkpoint de fine-tuning época {epoch+1} en {checkpoint_path}")
    print(f"Guardado checkpoint de fine-tuning época {epoch+1} en {checkpoint_path}")

    # Early stopping
    early_stopping(eval_results['f1'])
    if early_stopping.early_stop:
        logger.info(f"Early stopping activado en época {epoch+1}")
        print(f"Early stopping activado en época {epoch+1}")
        break



Iniciando fase de Fine-Tuning
Creando y cargando modelo desde checkpoint


config.json:   0%|          | 0.00/22.7k [00:00<?, ?B/s]

Parámetros entrenables: 121,260,290 / 121,260,290 (100.00%)


preprocessor_config.json:   0%|          | 0.00/412 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Cargados 8000 videos para split 'train'
Violencia: 4000, No Violencia: 4000
Cargados 1500 videos para split 'val'
Violencia: 750, No Violencia: 750
Iniciando época 1/5 (fine-tuning)


Época 1:  20%|█▉        | 199/1000 [04:34<17:22,  1.30s/it, loss=0.01, acc=1, prec=1, rec=1, f1=1]           ERROR:__main__:Error en paso 199, época 1: cannot access local variable 'loss' where it is not associated with a value
Época 1:  40%|███▉      | 399/1000 [08:15<12:05,  1.21s/it, loss=0.0917, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 399, época 1: cannot access local variable 'loss' where it is not associated with a value
Época 1:  60%|█████▉    | 599/1000 [12:03<07:56,  1.19s/it, loss=0.116, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 599, época 1: cannot access local variable 'loss' where it is not associated with a value
Época 1:  80%|███████▉  | 799/1000 [15:51<04:23,  1.31s/it, loss=0.0251, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 799, época 1: cannot access local variable 'loss' where it is not associated with a value
Época 1: 100%|█████████▉| 999/1000 [19:38<00:01,  1.68s/it, loss=0.0286, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:E

Epoch 1/5 (FT) - Train Loss: 0.1218, Val Loss: 0.0993, Train Acc: 0.9499, Val Acc: 0.9280, Val F1: 0.9227, Val Specificity: 0.9960
Guardado mejor modelo (FT) con F1: 0.9227 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_ft2.pt
Guardado checkpoint de fine-tuning época 1 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_ft_epoch1_2.pt
Iniciando época 2/5 (fine-tuning)


Época 2:  20%|█▉        | 199/1000 [02:22<09:14,  1.45it/s, loss=0.326, acc=0.75, prec=0.8, rec=0.8, f1=0.8] ERROR:__main__:Error en paso 199, época 2: cannot access local variable 'loss' where it is not associated with a value
Época 2:  40%|███▉      | 399/1000 [04:41<06:55,  1.45it/s, loss=0.0251, acc=0.875, prec=0.833, rec=1, f1=0.909]ERROR:__main__:Error en paso 399, época 2: cannot access local variable 'loss' where it is not associated with a value
Época 2:  60%|█████▉    | 599/1000 [06:59<04:37,  1.44it/s, loss=0.0831, acc=0.875, prec=0.75, rec=1, f1=0.857]ERROR:__main__:Error en paso 599, época 2: cannot access local variable 'loss' where it is not associated with a value
Época 2:  80%|███████▉  | 799/1000 [09:18<02:19,  1.44it/s, loss=0.0889, acc=1, prec=1, rec=1, f1=1] ERROR:__main__:Error en paso 799, época 2: cannot access local variable 'loss' where it is not associated with a value
Época 2: 100%|█████████▉| 999/1000 [11:36<00:00,  1.45it/s, loss=0.00203, acc=1, prec=1, re

Epoch 2/5 (FT) - Train Loss: 0.0822, Val Loss: 0.0747, Train Acc: 0.9681, Val Acc: 0.9573, Val F1: 0.9558, Val Specificity: 0.9920
Guardado mejor modelo (FT) con F1: 0.9558 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_ft2.pt
Guardado checkpoint de fine-tuning época 2 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_ft_epoch2_2.pt
Iniciando época 3/5 (fine-tuning)


Época 3:  20%|█▉        | 199/1000 [02:48<09:14,  1.45it/s, loss=0.0127, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 199, época 3: cannot access local variable 'loss' where it is not associated with a value
Época 3:  40%|███▉      | 399/1000 [05:07<06:55,  1.45it/s, loss=0.0138, acc=1, prec=1, rec=1, f1=1] ERROR:__main__:Error en paso 399, época 3: cannot access local variable 'loss' where it is not associated with a value
Época 3:  60%|█████▉    | 599/1000 [07:25<04:37,  1.45it/s, loss=0.00139, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 599, época 3: cannot access local variable 'loss' where it is not associated with a value
Época 3:  80%|███████▉  | 799/1000 [09:44<02:19,  1.44it/s, loss=0.0688, acc=0.875, prec=0.833, rec=1, f1=0.909]ERROR:__main__:Error en paso 799, época 3: cannot access local variable 'loss' where it is not associated with a value
Época 3: 100%|█████████▉| 999/1000 [12:02<00:00,  1.45it/s, loss=0.0015, acc=1, prec=1, rec=1, f1=1]ERROR:__ma

Epoch 3/5 (FT) - Train Loss: 0.0578, Val Loss: 0.0650, Train Acc: 0.9775, Val Acc: 0.9600, Val F1: 0.9586, Val Specificity: 0.9933
Guardado mejor modelo (FT) con F1: 0.9586 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_ft2.pt
Guardado checkpoint de fine-tuning época 3 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_ft_epoch3_2.pt
Iniciando época 4/5 (fine-tuning)


Época 4:  20%|█▉        | 199/1000 [02:47<09:14,  1.45it/s, loss=0.000395, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 199, época 4: cannot access local variable 'loss' where it is not associated with a value
Época 4:  40%|███▉      | 399/1000 [05:06<06:55,  1.45it/s, loss=0.00713, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 399, época 4: cannot access local variable 'loss' where it is not associated with a value
Época 4:  60%|█████▉    | 599/1000 [07:24<04:37,  1.45it/s, loss=0.00531, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 599, época 4: cannot access local variable 'loss' where it is not associated with a value
Época 4:  80%|███████▉  | 799/1000 [09:43<02:19,  1.44it/s, loss=0.00629, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 799, época 4: cannot access local variable 'loss' where it is not associated with a value
Época 4: 100%|█████████▉| 999/1000 [12:01<00:00,  1.45it/s, loss=0.0112, acc=1, prec=1, rec=1, f1=1] ERROR:__main__:Err

Epoch 4/5 (FT) - Train Loss: 0.0446, Val Loss: 0.0601, Train Acc: 0.9832, Val Acc: 0.9620, Val F1: 0.9608, Val Specificity: 0.9933
Guardado mejor modelo (FT) con F1: 0.9608 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_ft2.pt
Guardado checkpoint de fine-tuning época 4 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_ft_epoch4_2.pt
Iniciando época 5/5 (fine-tuning)


Época 5:  20%|█▉        | 199/1000 [02:49<09:14,  1.45it/s, loss=0.0221, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 199, época 5: cannot access local variable 'loss' where it is not associated with a value
Época 5:  40%|███▉      | 399/1000 [05:07<06:55,  1.45it/s, loss=0.00234, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 399, época 5: cannot access local variable 'loss' where it is not associated with a value
Época 5:  60%|█████▉    | 599/1000 [07:26<04:38,  1.44it/s, loss=0.00742, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 599, época 5: cannot access local variable 'loss' where it is not associated with a value
Época 5:  80%|███████▉  | 799/1000 [09:44<02:19,  1.44it/s, loss=0.000511, acc=1, prec=1, rec=1, f1=1]ERROR:__main__:Error en paso 799, época 5: cannot access local variable 'loss' where it is not associated with a value
Época 5: 100%|█████████▉| 999/1000 [12:03<00:00,  1.45it/s, loss=0.0033, acc=1, prec=1, rec=1, f1=1]         ERROR:__mai

Epoch 5/5 (FT) - Train Loss: 0.0399, Val Loss: 0.0599, Train Acc: 0.9854, Val Acc: 0.9613, Val F1: 0.9601, Val Specificity: 0.9933
Guardado checkpoint de fine-tuning época 5 en /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/checkpoint_ft_epoch5_2.pt


In [None]:
# 9. Visualizar métricas
plt.figure(figsize=(20, 15))

metrics_to_plot = ['loss', 'accuracy', 'precision', 'recall', 'f1', 'specificity']
# Usar la longitud de val_metrics, ya que evaluate siempre calcula todas las métricas
# y val_metrics tendrá la longitud correcta en caso de Early Stopping.
epochs = range(1, len(val_metrics['loss']) + 1)

for i, metric in enumerate(metrics_to_plot):
    plt.subplot(3, 2, i+1)
    # Graficar métrica de entrenamiento SOLO SI existe en train_metrics y no está vacía
    if metric in train_metrics and train_metrics[metric]:
        # Asegurarse de tomar solo los datos hasta el número de épocas graficadas
        plt.plot(epochs, train_metrics[metric][:len(epochs)], 'b-', label=f'Training {metric}')
    # Graficar métrica de validación SIEMPRE que exista (evaluate siempre las devuelve)
    if metric in val_metrics and val_metrics[metric]:
        # Asegurarse de tomar solo los datos hasta el número de épocas graficadas
        plt.plot(epochs, val_metrics[metric][:len(epochs)], 'r-', label=f'Validation {metric}')
    else:
        # Si por alguna razón (inesperada) tampoco hay datos de validación, saltar este gráfico
        continue

    plt.title(f'{metric.capitalize()} vs. Epochs (Fine-Tuning)')
    plt.xlabel('Epochs')
    plt.ylabel(metric.capitalize())
    plt.legend()
    plt.grid(True)

plt.tight_layout()
plt.savefig(os.path.join(CONFIG["output_dir"], "fine_tuning_metrics.png"))
plt.close()

# 10. Evaluación final del mejor modelo
# Cargar el mejor modelo
# Comprobar si existe el modelo 'best_ft2.pt' y si no, usar 'best_ft.pt'
best_model_path_ft2 = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_ft2.pt")
best_model_path_ft = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_ft.pt")

if os.path.exists(best_model_path_ft2):
    best_model_path = best_model_path_ft2
    print(f"Cargando el mejor modelo desde {best_model_path_ft2}")
    logger.info(f"Cargando el mejor modelo desde {best_model_path_ft2}")
elif os.path.exists(best_model_path_ft):
    best_model_path = best_model_path_ft
    print(f"Cargando el mejor modelo desde {best_model_path_ft}")
    logger.info(f"Cargando el mejor modelo desde {best_model_path_ft}")
else:
     raise FileNotFoundError(f"No se encontró ningún modelo 'best_ft2.pt' o 'best_ft.pt' en {CONFIG['output_dir']}")


checkpoint = torch.load(best_model_path)
model.load_state_dict(checkpoint['model_state_dict'])

logger.info(f"Evaluando mejor modelo de Fine-Tuning (F1: {checkpoint['val_f1']:.4f})")
print(f"Evaluando mejor modelo de Fine-Tuning (F1: {checkpoint['val_f1']:.4f}")

final_eval_results = evaluate(
    model=model,
    dataloader=val_dataloader,
    criterion=criterion,
    device=device,
    config=CONFIG
)

# Guardar y visualizar todas las métricas requeridas
metrics_report = save_complete_metrics(final_eval_results, CONFIG, phase='fine_tuning')

# 11. Evaluar también en el conjunto de prueba para validar generalización
test_dataset = ViolenceVideoDataset(
    root_dir=CONFIG["dataset_path"],
    split='test',
    num_frames=CONFIG["num_frames"],
    image_size=CONFIG["image_size"]
)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=CONFIG["ft_batch_size"],
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

logger.info("Evaluando en conjunto de prueba para validar generalización")
print("Evaluando en conjunto de prueba para validar generalización")

test_results = evaluate(
    model=model,
    dataloader=test_dataloader,
    criterion=criterion,
    device=device,
    config=CONFIG
)

# Guardar y visualizar las métricas en el conjunto de prueba
test_metrics_report = save_complete_metrics(test_results, CONFIG, phase='test')

# 12. Visualizar algunos ejemplos de predicciones (correctas e incorrectas)
def visualize_examples(model, dataloader, config, num_examples=5, phase='test_examples'):
    """Visualiza ejemplos de predicciones para análisis cualitativo"""
    model.eval()
    correct_examples = []
    incorrect_examples = []

    with torch.no_grad():
        for batch in dataloader:
            if len(correct_examples) >= num_examples and len(incorrect_examples) >= num_examples:
                break

            pixel_values = batch['pixel_values'].to(device)
            labels = batch['labels'].to(device)
            video_paths = batch['video_path']

            outputs = model(pixel_values=pixel_values)
            logits = outputs.logits
            probs = torch.softmax(logits, dim=1)
            preds = torch.argmax(logits, dim=1)

            for i, (pred, label, video_path) in enumerate(zip(preds, labels, video_paths)):
                # Decodificar video para mostrar un frame
                try:
                    vr = VideoReader(video_path, ctx=cpu(0))
                    if len(vr) == 0:
                         logger.warning(f"Skipping visualization for empty video: {video_path}")
                         continue
                    mid_frame = vr[len(vr)//2].asnumpy()
                except Exception as e:
                    logger.error(f"Error decoding video for visualization {video_path}: {str(e)}")
                    continue # Skip this video if decoding fails


                example = {
                    'frame': mid_frame,
                    'prediction': pred.item(),
                    'true_label': label.item(),
                    'confidence': probs[i, pred].item(),
                    'video_path': video_path
                }

                if pred.item() == label.item():
                    if len(correct_examples) < num_examples:
                        correct_examples.append(example)
                else:
                    if len(incorrect_examples) < num_examples:
                        incorrect_examples.append(example)

    # Visualizar ejemplos correctos
    if correct_examples:
        fig, axes = plt.subplots(1, len(correct_examples), figsize=(20, 4))
        if len(correct_examples) == 1:
            axes = [axes]

        for i, example in enumerate(correct_examples):
            axes[i].imshow(example['frame'])
            label_text = "Violencia" if example['true_label'] == 1 else "No Violencia"
            axes[i].set_title(f"Correcto: {label_text}\nConf: {example['confidence']:.2f}")
            axes[i].axis('off')

        plt.tight_layout()
        plt.savefig(os.path.join(config["output_dir"], f"correct_predictions_{phase}.png"))
        plt.close()

    # Visualizar ejemplos incorrectos
    if incorrect_examples:
        fig, axes = plt.subplots(1, len(incorrect_examples), figsize=(20, 4))
        if len(incorrect_examples) == 1:
            axes = [axes]

        for i, example in enumerate(incorrect_examples):
            axes[i].imshow(example['frame'])
            true_label = "Violencia" if example['true_label'] == 1 else "No Violencia"
            pred_label = "Violencia" if example['prediction'] == 1 else "No Violencia"
            axes[i].set_title(f"Error\nReal: {true_label}\nPred: {pred_label}\nConf: {example['confidence']:.2f}")
            axes[i].axis('off')

        plt.tight_layout()
        plt.savefig(os.path.join(config["output_dir"], f"incorrect_predictions_{phase}.png"))
        plt.close()

# Visualizar ejemplos de predicciones
visualize_examples(model, test_dataloader, CONFIG)

logger.info("Completada fase de Fine-Tuning")
print("Completada fase de Fine-Tuning")

# Guardar resultados finales
ft_results = final_eval_results

Cargando el mejor modelo desde /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_ft2.pt
Evaluando mejor modelo de Fine-Tuning (F1: 0.9608


Evaluando: 100%|██████████| 188/188 [00:55<00:00,  3.37it/s]


Cargados 800 videos para split 'test'
Violencia: 400, No Violencia: 400
Evaluando en conjunto de prueba para validar generalización


Evaluando: 100%|██████████| 100/100 [02:00<00:00,  1.20s/it]


Completada fase de Fine-Tuning


In [None]:
# ============================== EVALUACIÓN EN CONJUNTO DE PRUEBA ==============================

logger.info("Evaluando modelo en conjunto de prueba")

# Cargar el mejor modelo de Fine-Tuning
best_ft_model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_ft.pt2")

# Comprobar si ya tenemos el modelo cargado
try:
    # Intentar acceder al modelo
    model
    logger.info("Usando modelo ya cargado de celda anterior")
    # Cargar estado del mejor modelo de fine-tuning
    checkpoint = torch.load(best_ft_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])
except NameError:
    # Si el modelo no está definido, crear uno nuevo y cargarlo
    logger.info("Cargando modelo desde checkpoint")
    model = TimesformerForVideoClassification.from_pretrained(
        CONFIG["pretrained_model"],
        num_frames=CONFIG["num_frames"],
        image_size=CONFIG["image_size"],
    )

    # Adaptarlo a nuestra tarea
    if model.classifier.out_features != CONFIG["num_classes"]:
        model.classifier = nn.Sequential(
            nn.Dropout(CONFIG["tl_dropout"]),
            nn.Linear(model.classifier.in_features, CONFIG["num_classes"])
        )

    checkpoint = torch.load(best_ft_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])

# Asegurar que el modelo está en el dispositivo correcto
model.to(device)
model.eval()

# Cargar dataset de prueba
test_dataset = ViolenceVideoDataset(
    root_dir=CONFIG["dataset_path"],
    split='test',
    num_frames=CONFIG["num_frames"],
    image_size=CONFIG["image_size"]
)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=CONFIG["ft_batch_size"],
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

# Evaluar
criterion = nn.CrossEntropyLoss()
test_results = evaluate(
    model=model,
    dataloader=test_dataloader,
    criterion=criterion,
    device=device,
    config=CONFIG
)

# Visualizar matriz de confusión
plot_confusion_matrix(test_results['confusion_matrix'], CONFIG, phase='test')

# Visualizar curva ROC
plot_roc_curve(
    test_results['fpr'],
    test_results['tpr'],
    test_results['roc_auc'],
    CONFIG,
    phase='test'
)

# Generar y guardar reporte detallado
save_evaluation_report(test_results, CONFIG, phase='test')

# Métricas adicionales: Precision-Recall curve
precision, recall, _ = precision_recall_curve(
    test_results['labels'],
    test_results['predictions']
)
pr_auc = average_precision_score(test_results['labels'], test_results['predictions'])

# Graficar curva Precision-Recall
plt.figure(figsize=(10, 8))
plt.plot(recall, precision, color='blue', lw=2, label=f'PR curve (AP = {pr_auc:.2f})')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend(loc="lower left")
plt.grid(True)
plt.savefig(os.path.join(CONFIG["output_dir"], "precision_recall_curve_test.png"))
plt.close()

# Análisis de mejores umbrales
# Calcular métricas para diferentes umbrales
thresholds = np.linspace(0.1, 0.9, 9)
threshold_metrics = []

for threshold in thresholds:
    binary_preds = (test_results['predictions'] >= threshold).astype(int)

    acc = accuracy_score(test_results['labels'], binary_preds)
    prec = precision_score(test_results['labels'], binary_preds, zero_division=0)
    rec = recall_score(test_results['labels'], binary_preds, zero_division=0)
    f1 = f1_score(test_results['labels'], binary_preds, zero_division=0)

    threshold_metrics.append({
        'threshold': threshold,
        'accuracy': acc,
        'precision': prec,
        'recall': rec,
        'f1': f1
    })

# Convertir a DataFrame para mejor visualización
threshold_df = pd.DataFrame(threshold_metrics)

# Graficar métricas vs umbral
plt.figure(figsize=(12, 8))
for metric in ['accuracy', 'precision', 'recall', 'f1']:
    plt.plot(threshold_df['threshold'], threshold_df[metric], marker='o', label=metric)

plt.xlabel('Umbral de decisión')
plt.ylabel('Valor de métrica')
plt.title('Métricas vs Umbral de decisión')
plt.legend()
plt.grid(True)
plt.savefig(os.path.join(CONFIG["output_dir"], "threshold_analysis.png"))
plt.close()

# Encontrar mejor umbral según F1
best_threshold_idx = threshold_df['f1'].idxmax()
best_threshold = threshold_df.loc[best_threshold_idx, 'threshold']

logger.info(f"Mejor umbral encontrado: {best_threshold:.2f} con F1: {threshold_df.loc[best_threshold_idx, 'f1']:.4f}")

# Guardar análisis de umbrales
threshold_df.to_csv(os.path.join(CONFIG["output_dir"], "threshold_analysis.csv"), index=False)

# Actualizar el umbral en la configuración
CONFIG["threshold"] = float(best_threshold)
with open(os.path.join(CONFIG["output_dir"], "config.json"), 'w') as f:
    json.dump(CONFIG, f, indent=4)

# Calcular y visualizar curva ROC detallada con punto óptimo
fpr, tpr, thresholds_roc = roc_curve(test_results['labels'], test_results['predictions'])
roc_auc = auc(fpr, tpr)

# Calcular distancia al punto óptimo (0,1)
distances = np.sqrt((1-tpr)**2 + fpr**2)
optimal_idx = np.argmin(distances)
optimal_threshold = thresholds_roc[optimal_idx]

plt.figure(figsize=(10, 8))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.scatter(fpr[optimal_idx], tpr[optimal_idx], marker='o', color='red',
            label=f'Punto óptimo (umbral={optimal_threshold:.2f})')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Tasa de Falsos Positivos')
plt.ylabel('Tasa de Verdaderos Positivos')
plt.title('Curva ROC con punto óptimo')
plt.legend(loc="lower right")
plt.grid(True)
plt.savefig(os.path.join(CONFIG["output_dir"], "roc_curve_optimal_test.png"))
plt.close()

logger.info(f"Umbral óptimo según distancia a punto ideal en ROC: {optimal_threshold:.4f}")

# Guardar este umbral también
with open(os.path.join(CONFIG["output_dir"], "optimal_thresholds.json"), 'w') as f:
    json.dump({
        'f1_optimal': float(best_threshold),
        'roc_optimal': float(optimal_threshold)
    }, f, indent=4)

# Mostrar resumen de resultados
logger.info(f"Resumen de evaluación en conjunto de prueba:")
logger.info(f"Accuracy: {test_results['accuracy']:.4f}")
logger.info(f"Precision: {test_results['precision']:.4f}")
logger.info(f"Recall (Sensibilidad): {test_results['recall']:.4f}")
logger.info(f"Specificity: {test_results['specificity']:.4f}")
logger.info(f"F1-Score: {test_results['f1']:.4f}")
logger.info(f"ROC AUC: {test_results['roc_auc']:.4f}")
logger.info(f"PR AUC: {pr_auc:.4f}")

Cargados 800 videos para split 'test'
Violencia: 400, No Violencia: 400


Evaluando: 100%|██████████| 200/200 [05:38<00:00,  1.69s/it]


In [None]:
!pip install onnx
!pip install onnxruntime

Collecting onnx
  Downloading onnx-1.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Downloading onnx-1.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m116.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx
Successfully installed onnx-1.18.0
Collecting onnxruntime
  Downloading onnxruntime-1.22.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnxruntime-1.22.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m118.4 MB/s[0m eta [36m0

## EXPORTACIÓN DEL MODELO

In [None]:
!pip install onnx
!pip install onnxruntime

Collecting onnx
  Downloading onnx-1.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Downloading onnx-1.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m120.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx
Successfully installed onnx-1.18.0
Collecting onnxruntime
  Downloading onnxruntime-1.22.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnxruntime-1.22.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m121.6 MB/s[0m eta [36m0

In [None]:
# Importar bibliotecas necesarias
import os
import json
import torch
import torch.nn as nn
import copy
from datetime import datetime
import logging
from transformers import TimesformerForVideoClassification, AutoImageProcessor

# Configuración básica
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Verificar disponibilidad de GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando dispositivo: {device}")

# Configurar directorios y rutas
CONFIG = {
    "output_dir": "/content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer",
    "model_name": "timesformer_violence_detector",
    "pretrained_model": "facebook/timesformer-base-finetuned-k400",
    "num_frames": 8,
    "image_size": 224,
    "num_classes": 2,
    "threshold": 0.70,
}

# Rutas de los modelos
best_ft_model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_ft2.pt")
exports_dir = os.path.join(CONFIG["output_dir"], "exported_models")
os.makedirs(exports_dir, exist_ok=True)

print("=== EXPORTACIÓN DEL MODELO TIMESFORMER PARA DETECCIÓN DE VIOLENCIA ===")
print(f"Cargando modelo entrenado desde: {best_ft_model_path}")

# Cargar el modelo entrenado
try:
    # Crear la instancia del modelo con la misma configuración usada en entrenamiento
    model = TimesformerForVideoClassification.from_pretrained(
        CONFIG["pretrained_model"],
        num_frames=CONFIG["num_frames"],
        image_size=CONFIG["image_size"],
        num_labels=CONFIG["num_classes"],
        ignore_mismatched_sizes=True
    )

    # Cargar los pesos del modelo entrenado
    checkpoint = torch.load(best_ft_model_path, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])

    # Mover a GPU si está disponible y establecer modo evaluación
    model.to(device)
    model.eval()

    print(f"Modelo cargado exitosamente. F1-Score: {checkpoint.get('val_f1', 'N/A')}")
except Exception as e:
    logger.error(f"Error al cargar el modelo: {str(e)}")
    raise

# CLASE PARA INFERENCIA OPTIMIZADA
class TimesformerOptimizedInference(nn.Module):
    """Modelo optimizado para inferencia que simplifica la salida"""
    def __init__(self, model):
        super().__init__()
        self.timesformer = model
        # Desactivar dropout para inferencia
        for module in self.modules():
            if hasattr(module, 'dropout'):
                module.dropout.p = 0

    def forward(self, pixel_values):
        self.timesformer.eval()
        with torch.no_grad():
            outputs = self.timesformer(pixel_values=pixel_values)
            logits = outputs.logits
            # Retornar probabilidades
            probs = torch.softmax(logits, dim=1)
            return probs

# 1. Guardar modelo PyTorch original
print("\n1. Guardando modelo PyTorch original...")
original_model_path = os.path.join(exports_dir, f"{CONFIG['model_name']}_original.pt")

torch.save({
    'model_state_dict': model.state_dict(),
    'config': CONFIG,
    'val_f1': checkpoint.get('val_f1', None),
    'epoch': checkpoint.get('epoch', None),
}, original_model_path)

print(f"✓ Modelo original guardado: {original_model_path}")
print(f"  Tamaño: {os.path.getsize(original_model_path) / (1024*1024):.2f} MB")

# 2. Crear modelo de inferencia optimizado
print("\n2. Creando modelo optimizado para inferencia...")
optimized_model = TimesformerOptimizedInference(model)
optimized_model.eval()

# Guardar modelo optimizado
optimized_model_path = os.path.join(exports_dir, f"{CONFIG['model_name']}_inference.pt")
torch.save(optimized_model, optimized_model_path)
print(f"✓ Modelo optimizado guardado: {optimized_model_path}")
print(f"  Tamaño: {os.path.getsize(optimized_model_path) / (1024*1024):.2f} MB")

# 3. Exportar modelo FP16 (half precision)
print("\n3. Exportando modelo en precisión FP16...")
optimized_model_fp16 = copy.deepcopy(optimized_model).half()
optimized_model_fp16.eval()

fp16_model_path = os.path.join(exports_dir, f"{CONFIG['model_name']}_fp16.pt")
torch.save(optimized_model_fp16, fp16_model_path)
print(f"✓ Modelo FP16 guardado: {fp16_model_path}")
print(f"  Tamaño: {os.path.getsize(fp16_model_path) / (1024*1024):.2f} MB")



# 4. EXPORTACIÓN A TORCHSCRIPT - OPCIÓN 1 (Con half precision)
print("\n4. Exportando a TorchScript (opción half precision)...")
try:
    # Crear un wrapper para manejar la reorganización de dimensiones
    class TSModelWrapper(nn.Module):
        def __init__(self, model):
            super().__init__()
            self.model = model

        def forward(self, x):
            # x tiene forma [batch_size, channels, num_frames, height, width]
            # Reordenar a [batch_size, num_frames, channels, height, width]
            x = x.permute(0, 2, 1, 3, 4)
            return self.model(pixel_values=x)

    # Crear el wrapper con el modelo
    ts_wrapper_half = TSModelWrapper(optimized_model_fp16)

    # Crear input de ejemplo con la forma que espera el wrapper
    example_input_half = torch.randn(
        1, 3, CONFIG["num_frames"], CONFIG["image_size"], CONFIG["image_size"],
        device=device).half()

    # Trazar modelo
    scripted_model_half = torch.jit.trace(ts_wrapper_half, example_input_half)
    scripted_model_half = torch.jit.optimize_for_inference(scripted_model_half)

    # Guardar modelo TorchScript (half)
    ts_half_path = os.path.join(exports_dir, f"{CONFIG['model_name']}_scripted_half.pt")
    torch.jit.save(scripted_model_half, ts_half_path)

    print(f"✓ Modelo TorchScript (half) guardado: {ts_half_path}")
    print(f"  Tamaño: {os.path.getsize(ts_half_path) / (1024*1024):.2f} MB")
except Exception as e:
    logger.error(f"Error al exportar a TorchScript (half): {str(e)}")
    print(f"✗ Error al exportar a TorchScript (half): {str(e)}")



# 5. EXPORTACIÓN A TORCHSCRIPT - OPCIÓN 2 (Con precisión original)
print("\n5. Exportando a TorchScript (precisión original)...")
try:
    # Crear wrapper
    ts_wrapper_float = TSModelWrapper(optimized_model)

    # Crear input de ejemplo en precisión completa
    example_input_float = torch.randn(
        1, 3, CONFIG["num_frames"], CONFIG["image_size"], CONFIG["image_size"],
        device=device)

    # Trazar modelo original
    scripted_model_float = torch.jit.trace(ts_wrapper_float, example_input_float)
    scripted_model_float = torch.jit.optimize_for_inference(scripted_model_float)

    # Guardar modelo TorchScript (float)
    ts_float_path = os.path.join(exports_dir, f"{CONFIG['model_name']}_scripted_float.pt")
    torch.jit.save(scripted_model_float, ts_float_path)

    print(f"✓ Modelo TorchScript (float) guardado: {ts_float_path}")
    print(f"  Tamaño: {os.path.getsize(ts_float_path) / (1024*1024):.2f} MB")
except Exception as e:
    logger.error(f"Error al exportar a TorchScript (float): {str(e)}")
    print(f"✗ Error al exportar a TorchScript (float): {str(e)}")




# 6. EXPORTACIÓN A ONNX - OPCIÓN 1 (Con half precision)
print("\n6. Exportando a ONNX (opción half precision)...")
try:
    # Clase wrapper para ONNX con reordenamiento de dimensiones
    class ONNXWrapperHalf(nn.Module):
        def __init__(self, model):
            super().__init__()
            self.model = model

        def forward(self, x):
            # x tiene forma [batch_size, channels, num_frames, height, width]
            # Reordenar a [batch_size, num_frames, channels, height, width]
            x = x.permute(0, 2, 1, 3, 4)
            return self.model(pixel_values=x)

    # Crear wrapper y entrada
    onnx_wrapper_half = ONNXWrapperHalf(optimized_model_fp16)
    dummy_input_half = torch.randn(
        1, 3, CONFIG["num_frames"], CONFIG["image_size"], CONFIG["image_size"],
        device=device).half()

    # Ruta para el modelo ONNX
    onnx_half_path = os.path.join(exports_dir, f"{CONFIG['model_name']}_half.onnx")

    # Exportar a ONNX
    torch.onnx.export(
        onnx_wrapper_half,
        dummy_input_half,
        onnx_half_path,
        export_params=True,
        opset_version=12,
        do_constant_folding=True,
        input_names=['input'],
        output_names=['output'],
        dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
    )

    print(f"✓ Modelo ONNX (half) guardado: {onnx_half_path}")
    print(f"  Tamaño: {os.path.getsize(onnx_half_path) / (1024*1024):.2f} MB")
except Exception as e:
    logger.error(f"Error al exportar a ONNX (half): {str(e)}")
    print(f"✗ Error al exportar a ONNX (half): {str(e)}")




# 7. EXPORTACIÓN A ONNX - OPCIÓN 2 (Con precisión original)
print("\n7. Exportando a ONNX (precisión original)...")
try:
    # Clase wrapper para ONNX
    class ONNXWrapperFloat(nn.Module):
        def __init__(self, model):
            super().__init__()
            self.model = model

        def forward(self, x):
            # x tiene forma [batch_size, channels, num_frames, height, width]
            # Reordenar a [batch_size, num_frames, channels, height, width]
            x = x.permute(0, 2, 1, 3, 4)
            return self.model(pixel_values=x)

    # Crear wrapper y entrada
    onnx_wrapper_float = ONNXWrapperFloat(optimized_model)
    dummy_input_float = torch.randn(
        1, 3, CONFIG["num_frames"], CONFIG["image_size"], CONFIG["image_size"],
        device=device)

    # Ruta para el modelo ONNX
    onnx_float_path = os.path.join(exports_dir, f"{CONFIG['model_name']}_float.onnx")

    # Exportar a ONNX
    torch.onnx.export(
        onnx_wrapper_float,
        dummy_input_float,
        onnx_float_path,
        export_params=True,
        opset_version=12,
        do_constant_folding=True,
        input_names=['input'],
        output_names=['output'],
        dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
    )

    print(f"✓ Modelo ONNX (float) guardado: {onnx_float_path}")
    print(f"  Tamaño: {os.path.getsize(onnx_float_path) / (1024*1024):.2f} MB")
except Exception as e:
    logger.error(f"Error al exportar a ONNX (float): {str(e)}")
    print(f"✗ Error al exportar a ONNX (float): {str(e)}")

# 8. Guardar procesador de imágenes
print("\n8. Guardando procesador de imágenes...")
processor = AutoImageProcessor.from_pretrained(CONFIG["pretrained_model"])
processor_path = os.path.join(exports_dir, "processor")
os.makedirs(processor_path, exist_ok=True)
processor.save_pretrained(processor_path)
print(f"✓ Procesador guardado en: {processor_path}")

# 9. Guardar configuración de inferencia
print("\n9. Guardando configuración de inferencia...")
inference_config = {
    "num_frames": CONFIG["num_frames"],
    "image_size": CONFIG["image_size"],
    "threshold": CONFIG["threshold"],
    "model_type": "TimesformerForVideoClassification",
    "labels": ["no_violencia", "violencia"],
    "fps": 15,
    "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "formats_available": {
        "pytorch_original": os.path.basename(original_model_path),
        "pytorch_inference": os.path.basename(optimized_model_path),
        "pytorch_fp16": os.path.basename(fp16_model_path),
        "torchscript_half": os.path.basename(ts_half_path) if 'ts_half_path' in locals() else None,
        "torchscript_float": os.path.basename(ts_float_path) if 'ts_float_path' in locals() else None,
        "onnx_half": os.path.basename(onnx_half_path) if 'onnx_half_path' in locals() else None,
        "onnx_float": os.path.basename(onnx_float_path) if 'onnx_float_path' in locals() else None,
    },
    "input_format": {
        "TorchScript_and_ONNX": "[batch_size, channels, num_frames, height, width]",
        "PyTorch_original": "[batch_size, num_frames, channels, height, width]",
        "note": "Es necesario permuter las dimensiones según el formato elegido"
    }
}

# Guardar configuración
inference_config_path = os.path.join(exports_dir, "inference_config.json")
with open(inference_config_path, 'w') as f:
    json.dump(inference_config, f, indent=4)

print(f"✓ Configuración guardada: {inference_config_path}")

# 10. Crear script de ejemplo para inferencia
print("\n10. Creando script de ejemplo para inferencia...")
example_script = """import torch
import cv2
import numpy as np
from pathlib import Path
from transformers import AutoImageProcessor

# Configuración
CONFIG = {
    "model_path": "MODEL_PATH",  # Reemplazar con la ruta al modelo exportado
    "processor_path": "PROCESSOR_PATH",  # Ruta al procesador
    "num_frames": 8,
    "image_size": 224,
    "threshold": 0.70,
    "device": "cuda" if torch.cuda.is_available() else "cpu"
}

# Determinar tipo de modelo
is_torchscript = CONFIG["model_path"].endswith(".pt") and "scripted" in CONFIG["model_path"]
is_onnx = CONFIG["model_path"].endswith(".onnx")

# Cargar procesador
processor = AutoImageProcessor.from_pretrained(CONFIG["processor_path"])

# Cargar modelo según su tipo
if is_torchscript:
    model = torch.jit.load(CONFIG["model_path"]).to(CONFIG["device"])
    model.eval()
elif is_onnx:
    import onnxruntime as ort
    providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if torch.cuda.is_available() else ['CPUExecutionProvider']
    model = ort.InferenceSession(CONFIG["model_path"], providers=providers)
else:
    # Modelo PyTorch estándar
    model = torch.load(CONFIG["model_path"], map_location=CONFIG["device"])
    model.eval()

def extract_frames(video_path, num_frames):
    ""Extrae frames uniformemente distribuidos de un video""
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Calcular índices de frames a extraer
    indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
    frames = []

    for idx in indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if ret:
            # Convertir de BGR a RGB
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # Redimensionar
            frame = cv2.resize(frame, (CONFIG["image_size"], CONFIG["image_size"]))
            frames.append(frame)

    cap.release()
    return frames

def predict_violence(video_path):
    ""Predice si hay violencia en un video""
    # Extraer frames
    frames = extract_frames(video_path, CONFIG["num_frames"])
    if len(frames) != CONFIG["num_frames"]:
        raise ValueError(f"No se pudieron extraer {CONFIG['num_frames']} frames del video")

    # Preprocesar frames según el tipo de modelo
    if is_torchscript or is_onnx:
        # Para TorchScript/ONNX (espera [batch, C, T, H, W])
        # Crear tensor [B, C, T, H, W]
        tensor_input = np.array(frames).transpose(3, 0, 1, 2) / 255.0  # [C, T, H, W]
        tensor_input = np.expand_dims(tensor_input, 0)  # [B, C, T, H, W]
        tensor_input = torch.from_numpy(tensor_input).float()

        if is_torchscript:
            tensor_input = tensor_input.to(CONFIG["device"])
            if "half" in CONFIG["model_path"]:
                tensor_input = tensor_input.half()

            # Inferencia
            with torch.no_grad():
                outputs = model(tensor_input)
                probs = outputs.cpu().numpy()[0]
        else:  # ONNX
            # Ejecutar inferencia ONNX
            ort_inputs = {model.get_inputs()[0].name: tensor_input.cpu().numpy()}
            outputs = model.run(None, ort_inputs)
            probs = outputs[0][0]
    else:
        # Para modelo PyTorch normal (espera [B, T, C, H, W])
        inputs = processor(frames, return_tensors="pt")
        pixel_values = inputs["pixel_values"].to(CONFIG["device"])

        # Inferencia
        with torch.no_grad():
            outputs = model(pixel_values=pixel_values)
            probs = outputs[0].cpu().numpy()

    # Procesar resultado
    violence_prob = probs[1]  # Probabilidad de clase "violencia"
    is_violence = violence_prob >= CONFIG["threshold"]

    return {
        "is_violence": bool(is_violence),
        "violence_probability": float(violence_prob),
        "no_violence_probability": float(probs[0])
    }

# Ejemplo de uso
if __name__ == "__main__":
    video_path = "ruta/a/tu/video.mp4"  # Reemplazar con ruta a un video
    result = predict_violence(video_path)
    print(f"Predicción: {'VIOLENCIA' if result['is_violence'] else 'NO VIOLENCIA'}")
    print(f"Probabilidad de violencia: {result['violence_probability']:.4f}")
    print(f"Probabilidad de no violencia: {result['no_violence_probability']:.4f}")
"""

# Guardar script de ejemplo
example_script_path = os.path.join(exports_dir, "inference_example.py")
with open(example_script_path, 'w') as f:
    f.write(example_script)

print(f"✓ Script de ejemplo guardado: {example_script_path}")

# Resumen final
print("\n=== RESUMEN DE EXPORTACIÓN ===")
print(f"Directorio de modelos exportados: {exports_dir}")
print("Formatos disponibles:")
for format_name, filename in inference_config["formats_available"].items():
    if filename:
        format_path = os.path.join(exports_dir, filename)
        size_mb = os.path.getsize(format_path) / (1024*1024) if os.path.exists(format_path) else 0
        print(f"- {format_name}: {filename} ({size_mb:.2f} MB)")
print("\nPróximos pasos:")
print("1. Selecciona el formato más adecuado para tu despliegue")
print("2. Adapta el script de ejemplo (inference_example.py) para tu aplicación web")
print("3. Para React: Configura un servidor backend que utilice el modelo o explora ONNX Runtime Web")

Usando dispositivo: cuda
=== EXPORTACIÓN DEL MODELO TIMESFORMER PARA DETECCIÓN DE VIOLENCIA ===
Cargando modelo entrenado desde: /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/timesformer_violence_detector_best_ft2.pt


Some weights of TimesformerForVideoClassification were not initialized from the model checkpoint at facebook/timesformer-base-finetuned-k400 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([400, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([400]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Modelo cargado exitosamente. F1-Score: 0.9607708189951824

1. Guardando modelo PyTorch original...
✓ Modelo original guardado: /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/exported_models/timesformer_violence_detector_original.pt
  Tamaño: 462.68 MB

2. Creando modelo optimizado para inferencia...
✓ Modelo optimizado guardado: /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/exported_models/timesformer_violence_detector_inference.pt
  Tamaño: 462.73 MB

3. Exportando modelo en precisión FP16...
✓ Modelo FP16 guardado: /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/exported_models/timesformer_violence_detector_fp16.pt
  Tamaño: 231.44 MB

4. Exportando a TorchScript (opción half precision)...
✓ Modelo TorchScript (half) guardado: /content/drive/MyDrive/TrabajoProyecto_IA3/modelo_timesformer/exported_models/timesformer_violence_detector_scripted_half.pt
  Tamaño: 231.39 MB

5. Exportando a TorchScript (precisión original)...
✓ Modelo TorchScri

In [None]:
model

TimesformerForVideoClassification(
  (timesformer): TimesformerModel(
    (embeddings): TimesformerEmbeddings(
      (patch_embeddings): TimesformerPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (pos_drop): Dropout(p=0.0, inplace=False)
      (time_drop): Dropout(p=0.0, inplace=False)
    )
    (encoder): TimesformerEncoder(
      (layer): ModuleList(
        (0-11): 12 x TimesformerLayer(
          (drop_path): Identity()
          (attention): TimeSformerAttention(
            (attention): TimesformerSelfAttention(
              (qkv): Linear(in_features=768, out_features=2304, bias=True)
              (attn_drop): Dropout(p=0.0, inplace=False)
            )
            (output): TimesformerSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0, inplace=False)
            )
          )
          (intermediate): TimesformerIntermediate(
            (dense):

In [None]:
# ============================== PRUEBA DE INFERENCIA ==============================

logger.info("Realizando pruebas de inferencia en muestras")
print("Realizando pruebas de inferencia en muestras")

# Comprobar si ya tenemos el modelo cargado
try:
    # Intentar acceder al modelo
    model
    # Asegurarse de que tiene cargado el mejor modelo de fine-tuning
    best_ft_model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_ft2.pt")
    checkpoint = torch.load(best_ft_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])
except NameError:
    # Si el modelo no está definido, crear uno nuevo y cargarlo
    logger.info("Cargando modelo desde checkpoint")
    print("Cargando modelo desde checkpoint")
    model = TimesformerForVideoClassification.from_pretrained(
        CONFIG["pretrained_model"],
        num_frames=CONFIG["num_frames"],
        image_size=CONFIG["image_size"],
    )

    # Adaptarlo a nuestra tarea
    if model.classifier.out_features != CONFIG["num_classes"]:
        model.classifier = nn.Sequential(
            nn.Dropout(CONFIG["tl_dropout"]),
            nn.Linear(model.classifier.in_features, CONFIG["num_classes"])
        )

    best_ft_model_path = os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_best_ft2.pt")
    checkpoint = torch.load(best_ft_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])

# Asegurar que el modelo está en el dispositivo correcto
model.to(device)
model.eval()

# Cargar dataset de prueba
test_dataset = ViolenceVideoDataset(
    root_dir=CONFIG["dataset_path"],
    split='test',
    num_frames=CONFIG["num_frames"],
    image_size=CONFIG["image_size"]
)

# Seleccionar algunas muestras aleatorias
num_samples = min(5, len(test_dataset))
sample_indices = random.sample(range(len(test_dataset)), num_samples)

# Resultados
results = []

# Crear figura para visualización
fig, axes = plt.subplots(num_samples, 2, figsize=(12, 4*num_samples))
if num_samples == 1:
    axes = axes.reshape(1, 2)

for i, idx in enumerate(sample_indices):
    try:
        # Obtener muestra
        sample = test_dataset[idx]
        pixel_values = sample['pixel_values'].unsqueeze(0).to(device)  # Añadir dimensión de batch
        label = sample['labels'].item()
        video_path = sample['video_path']

        # Inferencia
        with torch.no_grad():
            outputs = model(pixel_values=pixel_values)
            logits = outputs.logits
            probs = torch.softmax(logits, dim=1)
            violence_prob = probs[0, 1].item()
            prediction = violence_prob >= CONFIG["threshold"]

        # Extraer un frame para visualización
        video_reader = VideoReader(video_path, ctx=cpu(0))
        mid_frame_idx = len(video_reader) // 2
        frame = video_reader[mid_frame_idx].asnumpy()

        # Guardar resultado
        results.append({
            'video_path': video_path,
            'true_label': label,
            'violence_prob': violence_prob,
            'prediction': prediction,
            'correct': (prediction == label)
        })

        # Visualizar
        axes[i, 0].imshow(frame)
        axes[i, 0].set_title(f"Video: {os.path.basename(video_path)}")
        axes[i, 0].axis('off')

        # Graficar probabilidad
        bar_colors = ['green', 'red']
        class_names = ['No Violencia', 'Violencia']
        class_probs = [1 - violence_prob, violence_prob]

        axes[i, 1].barh(class_names, class_probs, color=bar_colors)
        axes[i, 1].set_xlim(0, 1)
        axes[i, 1].set_title(f"Predicción: {'Violencia' if prediction else 'No Violencia'} " +
                          f"(Real: {'Violencia' if label else 'No Violencia'})")
        axes[i, 1].axvline(x=CONFIG["threshold"], color='black', linestyle='--',
                      label=f'Umbral: {CONFIG["threshold"]:.2f}')
        axes[i, 1].legend()

    except Exception as e:
        logger.error(f"Error al procesar muestra {idx}: {str(e)}")
        # En caso de error, dejar la posición vacía
        axes[i, 0].axis('off')
        axes[i, 1].axis('off')
        continue

plt.tight_layout()
plt.savefig(os.path.join(CONFIG["output_dir"], "inference_samples.png"))
plt.close()

# Guardar resultados
if results:
    results_df = pd.DataFrame(results)
    results_df.to_csv(os.path.join(CONFIG["output_dir"], "inference_samples_results.csv"), index=False)

    # Mostrar resumen
    correct_count = sum(1 for r in results if r['correct'])
    logger.info(f"Precisión en muestras de prueba: {correct_count}/{len(results)} ({100 * correct_count / len(results):.1f}%)")
    print(f"Precisión en muestras de prueba: {correct_count}/{len(results)} ({100 * correct_count / len(results):.1f}%)")
else:
    logger.warning("No se pudieron procesar muestras para pruebas de inferencia")

Realizando pruebas de inferencia en muestras
Cargados 800 videos para split 'test'
Violencia: 400, No Violencia: 400
Precisión en muestras de prueba: 5/5 (100.0%)


In [None]:
# ============================== RESUMEN FINAL DE MÉTRICAS ==============================

logger.info("=== RESUMEN FINAL DE MÉTRICAS ===")

# Intentar cargar informes de evaluación
try:
    # Transfer Learning
    tl_report_path = os.path.join(CONFIG["output_dir"], "evaluation_report_transfer_learning.json")
    with open(tl_report_path, 'r') as f:
        tl_report = json.load(f)

    # Fine-Tuning
    ft_report_path = os.path.join(CONFIG["output_dir"], "evaluation_report_fine_tuning.json")
    with open(ft_report_path, 'r') as f:
        ft_report = json.load(f)

    # Test
    test_report_path = os.path.join(CONFIG["output_dir"], "evaluation_report_test.json")
    with open(test_report_path, 'r') as f:
        test_report = json.load(f)

    # Mostrar métricas
    logger.info("Métricas en Transfer Learning (validación):")
    logger.info(f"  - Accuracy: {tl_report['metrics']['accuracy']:.4f}")
    logger.info(f"  - Precision: {tl_report['metrics']['precision']:.4f}")
    logger.info(f"  - Recall (Sensibilidad): {tl_report['metrics']['recall']:.4f}")
    logger.info(f"  - Specificity: {tl_report['metrics']['specificity']:.4f}")
    logger.info(f"  - F1-Score: {tl_report['metrics']['f1_score']:.4f}")
    logger.info(f"  - ROC AUC: {tl_report['metrics']['roc_auc']:.4f}")

    logger.info("Métricas en Fine-Tuning (validación):")
    logger.info(f"  - Accuracy: {ft_report['metrics']['accuracy']:.4f}")
    logger.info(f"  - Precision: {ft_report['metrics']['precision']:.4f}")
    logger.info(f"  - Recall (Sensibilidad): {ft_report['metrics']['recall']:.4f}")
    logger.info(f"  - Specificity: {ft_report['metrics']['specificity']:.4f}")
    logger.info(f"  - F1-Score: {ft_report['metrics']['f1_score']:.4f}")
    logger.info(f"  - ROC AUC: {ft_report['metrics']['roc_auc']:.4f}")

    logger.info("Métricas en Test (final):")
    logger.info(f"  - Accuracy: {test_report['metrics']['accuracy']:.4f}")
    logger.info(f"  - Precision: {test_report['metrics']['precision']:.4f}")
    logger.info(f"  - Recall (Sensibilidad): {test_report['metrics']['recall']:.4f}")
    logger.info(f"  - Specificity: {test_report['metrics']['specificity']:.4f}")
    logger.info(f"  - F1-Score: {test_report['metrics']['f1_score']:.4f}")
    logger.info(f"  - ROC AUC: {test_report['metrics']['roc_auc']:.4f}")

    # Crear tabla comparativa
    metrics = ['accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'roc_auc']
    data = {
        'Métrica': metrics,
        'Transfer Learning': [tl_report['metrics'][m] for m in metrics],
        'Fine-Tuning': [ft_report['metrics'][m] for m in metrics],
        'Test': [test_report['metrics'][m] for m in metrics]
    }

    df = pd.DataFrame(data)

    # Formatear para mostrar resultados
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 120)
    pd.set_option('display.precision', 4)

    print("\n=== TABLA COMPARATIVA DE MÉTRICAS ===")
    print(df)

    # Guardar tabla
    df.to_csv(os.path.join(CONFIG["output_dir"], "metrics_comparison.csv"), index=False)

except Exception as e:
    logger.error(f"Error al cargar informes de evaluación: {str(e)}")
    logger.info("Asegúrate de que las fases de Transfer Learning, Fine-Tuning y Test ya se han ejecutado.")

# Mostrar información sobre el modelo final
try:
    # Cargar información del benchmark
    benchmark_path = os.path.join(CONFIG["output_dir"], "benchmark_results.json")
    with open(benchmark_path, 'r') as f:
        benchmark = json.load(f)

    logger.info("\nRendimiento del modelo:")
    logger.info(f"  - Tiempo por inferencia: {benchmark['avg_time_per_inference_ms']:.2f} ms")
    logger.info(f"  - Frames por segundo: {benchmark['fps']:.2f} FPS")

    # Cargar umbrales óptimos
    thresholds_path = os.path.join(CONFIG["output_dir"], "optimal_thresholds.json")
    with open(thresholds_path, 'r') as f:
        thresholds = json.load(f)

    logger.info("\nUmbrales óptimos:")
    logger.info(f"  - Umbral óptimo según F1: {thresholds['f1_optimal']:.4f}")
    logger.info(f"  - Umbral óptimo según ROC: {thresholds['roc_optimal']:.4f}")

except Exception as e:
    logger.error(f"Error al cargar información de rendimiento: {str(e)}")

# Mostrar rutas de los modelos exportados
try:
    model_paths = {
        'Modelo PyTorch': os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_final.pt"),
        'Modelo de Inferencia': os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_inference.pt"),
        'Modelo Hugging Face': os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}_hf"),
        'Modelo ONNX': os.path.join(CONFIG["output_dir"], f"{CONFIG['model_name']}.onnx"),
        'Script de Inferencia': os.path.join(CONFIG["output_dir"], "inference_example.py")
    }

    logger.info("\nModelos exportados:")
    for name, path in model_paths.items():
        exists = "✓" if os.path.exists(path) else "✗"
        logger.info(f"  - {name}: {path} {exists}")

except Exception as e:
    logger.error(f"Error al verificar rutas de modelos: {str(e)}")

logger.info("\n¡Entrenamiento y evaluación del modelo TimeSformer para detección de violencia completados!")

# CONCLUSIONES