In [None]:
!pip install tqdm
!pip install pytorch-lightning
!pip install torchmetrics
!pip install transformers
!pip install opencv-python
!pip install onnx

In [1]:
# Instalar dependencias
!pip install torch torchvision pytorch-lightning transformers opencv-python-headless matplotlib seaborn tqdm tensorboard onnx

Collecting pytorch-lightning
  Downloading pytorch_lightning-2.5.1.post0-py3-none-any.whl.metadata (20 kB)
Collecting onnx
  Downloading onnx-1.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nv

In [2]:
# ===================== CONFIGURACIÓN INICIAL =====================
import os
import sys
import glob
import time
import json
import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from tqdm.auto import tqdm
from datetime import datetime
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler
from torch.optim.lr_scheduler import LambdaLR
from torchvision import transforms
from sklearn.metrics import (
    confusion_matrix,
    classification_report,
    precision_recall_curve,
    roc_curve,
    roc_auc_score,
    average_precision_score,
    accuracy_score,
    recall_score,
    precision_score,
    f1_score
)
from sklearn.model_selection import train_test_split
import pytorch_lightning as pl
from pytorch_lightning.callbacks import (
    ModelCheckpoint,
    EarlyStopping,
    LearningRateMonitor,
    TQDMProgressBar
)
from pytorch_lightning.loggers import TensorBoardLogger
import torchmetrics
from torchmetrics import (
    Accuracy,
    Precision,
    Recall,
    F1Score,
    AUROC,
    ConfusionMatrix,
    Specificity,
    MetricCollection # Added this import
)
from transformers import (
    AutoImageProcessor,
    TimesformerForVideoClassification,
    TimesformerConfig
)
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import gc
import warnings
import onnx
import onnx.checker
import hashlib
warnings.filterwarnings('ignore')

In [3]:
# Montar Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Verificar espacio en Google Drive
drive_info = !df -h /content/drive
print("\nEspacio en Google Drive:")
for line in drive_info:
    if '/content/drive' in line:
        print(line)


Espacio en Google Drive:
drive            15G  3.4G   12G  23% /content/drive


In [4]:
# Verificar disponibilidad de GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Utilizando dispositivo: {device}")
print(f"PyTorch versión: {torch.__version__}")
print(f"CUDA disponible: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA versión: {torch.version.cuda}")
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memoria GPU total: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")


# Configurar semillas para reproducibilidad
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    pl.seed_everything(seed)

seed_everything(42)

INFO:lightning_fabric.utilities.seed:Seed set to 42


Utilizando dispositivo: cuda
PyTorch versión: 2.6.0+cu124
CUDA disponible: True
CUDA versión: 12.4
GPU: Tesla T4
Memoria GPU total: 15.83 GB


In [5]:
# ===================== CONFIGURACIÓN DE DIRECTORIOS =====================
# Rutas principales
DRIVE_PATH = '/content/drive/MyDrive'
DATASET_PATH = f'{DRIVE_PATH}/dataset_violencia'
OUTPUT_PATH = f'{DRIVE_PATH}/TrabajoProyecto_IA3/timesformer_training'
CHECKPOINTS_PATH = f'{OUTPUT_PATH}/checkpoints'
LOGS_PATH = f'{OUTPUT_PATH}/logs'
RESULTS_PATH = f'{OUTPUT_PATH}/results'
PLOTS_PATH = f'{RESULTS_PATH}/plots'

# Crear directorios de salida
for path in [OUTPUT_PATH, CHECKPOINTS_PATH, LOGS_PATH, RESULTS_PATH, PLOTS_PATH]:
    os.makedirs(path, exist_ok=True)
    # Verificar permisos de escritura
    if not os.access(path, os.W_OK):
        raise PermissionError(f"No se puede escribir en {path}")

print(f"Estructura de directorios creada en: {OUTPUT_PATH}")

Estructura de directorios creada en: /content/drive/MyDrive/TrabajoProyecto_IA3/timesformer_training


In [6]:
# Configurar logging general
import logging
logging.basicConfig(
    filename=f"{OUTPUT_PATH}/training.log",
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger()
logger.info("Iniciando configuración del entrenamiento")

# Configuración general del entrenamiento
CONFIG = {
    'model_type': 'facebook/timesformer-base-finetuned-k400',
    'num_frames': 8,
    'image_size': 224,
    'batch_size': 8,
    'batch_size_finetune': 6,  # Aumentado para A100
    'num_workers': 8,  # Aumentado para A100
    'epochs_transfer': 20,
    'epochs_finetune': 10,
    'patience': 5,
    'classes': ['no_violence', 'violence'],
    'learning_rate_transfer': 5e-5,
    'learning_rate_finetune': 1e-5,
    'weight_decay': 0.01,
    'save_every_n_epochs': 5,
    'warmup_steps': 100
}

In [7]:
# ===================== VERIFICACIÓN Y ANÁLISIS DEL DATASET =====================
logger.info("Analizando el dataset")
print("\n=== ANÁLISIS DEL DATASET ===")
print(f"Ruta del dataset: {DATASET_PATH}")

def count_videos(directory, extension='.mp4'):
    if not os.path.exists(directory):
        return 0
    videos = [f for f in os.listdir(directory) if f.endswith(extension)]
    # Verificar integridad
    valid_videos = 0
    for video in videos:
        cap = cv2.VideoCapture(os.path.join(directory, video))
        if cap.isOpened() and cap.get(cv2.CAP_PROP_FRAME_COUNT) > 0:
            valid_videos += 1
        else:
            logger.warning(f"Video inválido detectado: {os.path.join(directory, video)}")
        cap.release()
    return valid_videos

# Verificar estructura del dataset
train_noviolence_dir = f"{DATASET_PATH}/train/no_violence"
train_violence_dir = f"{DATASET_PATH}/train/violence"
val_noviolence_dir = f"{DATASET_PATH}/val/no_violence"
val_violence_dir = f"{DATASET_PATH}/val/violence"
test_noviolence_dir = f"{DATASET_PATH}/test/no_violence"
test_violence_dir = f"{DATASET_PATH}/test/violence"

train_noviolence_count = count_videos(train_noviolence_dir)
train_violence_count = count_videos(train_violence_dir)
val_noviolence_count = count_videos(val_noviolence_dir)
val_violence_count = count_videos(val_violence_dir)
test_noviolence_count = count_videos(test_noviolence_dir)
test_violence_count = count_videos(test_violence_dir)

print("--- Conteo de Videos ---")
print(f"Train - No Violencia: {train_noviolence_count}")
print(f"Train - Violencia: {train_violence_count}")
print(f"Validación - No Violencia: {val_noviolence_count}")
print(f"Validación - Violencia: {val_violence_count}")
print(f"Test - No Violencia: {test_noviolence_count}")
print(f"Test - Violencia: {test_violence_count}")

total_train = train_noviolence_count + train_violence_count
total_val = val_noviolence_count + val_violence_count
total_test = test_noviolence_count + test_violence_count
total_videos = total_train + total_val + total_test

train_pct = total_train / total_videos * 100
val_pct = total_val / total_videos * 100
test_pct = total_test / total_videos * 100

print("\n--- Resumen del Dataset ---")
print(f"Total videos: {total_videos}")
print(f"Split de Train: {total_train} videos ({train_pct:.1f}%)")
print(f"Split de Validación: {total_val} videos ({val_pct:.1f}%)")
print(f"Split de Test: {total_test} videos ({test_pct:.1f}%)")

train_balance = train_violence_count / total_train * 100
val_balance = val_violence_count / total_val * 100
test_balance = test_violence_count / total_test * 100

print("\n--- Balance de Clases (% Violencia) ---")
print(f"Train: {train_balance:.1f}% violencia")
print(f"Validación: {val_balance:.1f}% violencia")
print(f"Test: {test_balance:.1f}% violencia")

# Guardar análisis del dataset
dataset_summary = {
    'total_videos': total_videos,
    'train': {'count': total_train, 'percent': train_pct, 'violence_percent': train_balance},
    'val': {'count': total_val, 'percent': val_pct, 'violence_percent': val_balance},
    'test': {'count': total_test, 'percent': test_pct, 'violence_percent': test_balance}
}
with open(f"{RESULTS_PATH}/dataset_summary.json", 'w') as f:
    json.dump(dataset_summary, f, indent=4)

plt.figure(figsize=(15, 5))
plt.subplot(1, 2, 1)
sizes = [total_train, total_val, total_test]
labels = [f'Train\n{total_train} videos\n({train_pct:.1f}%)',
          f'Val\n{total_val} videos\n({val_pct:.1f}%)',
          f'Test\n{total_test} videos\n({test_pct:.1f}%)']
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90,
        colors=['#4CAF50', '#2196F3', '#FF9800'])
plt.axis('equal')
plt.title('Distribución de Videos por Split')

plt.subplot(1, 2, 2)
no_violence_count = train_noviolence_count + val_noviolence_count + test_noviolence_count
violence_count = train_violence_count + val_violence_count + test_violence_count
class_sizes = [no_violence_count, violence_count]
class_labels = [f'No Violencia\n{no_violence_count} videos\n({no_violence_count/total_videos*100:.1f}%)',
                f'Violencia\n{violence_count} videos\n({violence_count/total_videos*100:.1f}%)']
plt.pie(class_sizes, labels=class_labels, autopct='%1.1f%%', startangle=90,
        colors=['#2196F3', '#F44336'])
plt.axis('equal')
plt.title('Distribución de Videos por Clase')

plt.tight_layout()
plt.savefig(f"{PLOTS_PATH}/dataset_distribution.png")
plt.close()


=== ANÁLISIS DEL DATASET ===
Ruta del dataset: /content/drive/MyDrive/dataset_violencia




--- Conteo de Videos ---
Train - No Violencia: 3999
Train - Violencia: 3998
Validación - No Violencia: 750
Validación - Violencia: 750
Test - No Violencia: 400
Test - Violencia: 400

--- Resumen del Dataset ---
Total videos: 10297
Split de Train: 7997 videos (77.7%)
Split de Validación: 1500 videos (14.6%)
Split de Test: 800 videos (7.8%)

--- Balance de Clases (% Violencia) ---
Train: 50.0% violencia
Validación: 50.0% violencia
Test: 50.0% violencia


In [8]:
# ===================== PREPARACIÓN DEL DATASET Y DATALOADERS =====================
class VideoDataset(Dataset):
    def __init__(self, base_dir, split, num_frames=8):
        self.base_dir = base_dir
        self.split = split
        self.num_frames = num_frames
        self.samples = []
        self.class_map = {'no_violence': 0, 'violence': 1}
        self.metadata_cache_path = f"{OUTPUT_PATH}/video_metadata.json"
        self.metadata_cache = self.load_metadata_cache()

        for class_name in self.class_map.keys():
            class_dir = os.path.join(base_dir, split, class_name)
            if not os.path.exists(class_dir):
                continue
            for video_file in glob.glob(os.path.join(class_dir, '*.mp4')):
                self.samples.append({
                    'path': video_file,
                    'label': self.class_map[class_name]
                })

        if len(self.samples) == 0:
            raise RuntimeError(f"No se encontraron videos en {base_dir}/{split}")

        print(f"Cargados {len(self.samples)} videos para el split '{split}'")
        logger.info(f"Cargados {len(self.samples)} videos para el split '{split}'")

        self.processor = AutoImageProcessor.from_pretrained(
            'facebook/timesformer-base-finetuned-k400',
            do_rescale=True,
            rescale_factor=1/255.0,
            do_resize=True,
            size={'height': 224, 'width': 224},
            do_normalize=True,
            image_mean=[0.485, 0.456, 0.406],
            image_std=[0.229, 0.224, 0.225]
        )

        # Guardar el caché inicial después de cargar los videos
        self.save_metadata_cache()

    def load_metadata_cache(self):
        if os.path.exists(self.metadata_cache_path):
            try:
                with open(self.metadata_cache_path, 'r') as f:
                    return json.load(f)
            except Exception as e:
                logger.warning(f"Error al cargar metadata cache: {e}")
                return {}
        return {}

    def save_metadata_cache(self):
        # Solo el proceso principal debería guardar el caché
        if torch.utils.data.get_worker_info() is not None:
            return  # Evitar que los workers escriban al archivo
        try:
            with open(self.metadata_cache_path, 'w') as f:
                json.dump(self.metadata_cache, f, indent=4)
            logger.info(f"Metadata cache guardado en {self.metadata_cache_path}")
        except Exception as e:
            logger.warning(f"Error al guardar metadata cache: {e}")

    def get_file_hash(self, file_path):
        hasher = hashlib.md5()
        try:
            with open(file_path, 'rb') as f:
                for chunk in iter(lambda: f.read(4096), b""):
                    hasher.update(chunk)
        except Exception as e:
            logger.warning(f"Error al calcular hash de {file_path}: {e}")
            return ""
        return hasher.hexdigest()

    def __len__(self):
        return len(self.samples)

    def load_video(self, video_path):
        frames = []
        video_hash = self.get_file_hash(video_path)

        # Verificar caché
        if video_path in self.metadata_cache and self.metadata_cache[video_path]['hash'] == video_hash:
            metadata = self.metadata_cache[video_path]
            if not metadata['valid']:
                logger.warning(f"Video en caché inválido: {video_path}")
                frames = [np.zeros((224, 224, 3), dtype=np.uint8) for _ in range(self.num_frames)]
                return frames
            duration = metadata['duration']
            fps = metadata['fps']
        else:
            cap = cv2.VideoCapture(video_path)
            try:
                if not cap.isOpened():
                    raise ValueError(f"No se pudo abrir el video: {video_path}")
                fps = cap.get(cv2.CAP_PROP_FPS)
                total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                duration = total_frames / fps if fps > 0 else 0
                if not (2 <= duration <= 9.0):  # Manteniendo el rango original
                    print(f"Advertencia: Video {video_path} tiene duración {duration:.2f}s (esperado 2-9s)")
                    logger.warning(f"Video {video_path} tiene duración {duration:.2f}s (esperado 2-9s)")
                if not (10 <= fps <= 20):
                    print(f"Advertencia: Video {video_path} tiene {fps:.2f} FPS (esperado ~15 FPS)")
                    logger.warning(f"Video {video_path} tiene {fps:.2f} FPS (esperado ~15 FPS)")
                # Actualizar caché en memoria
                self.metadata_cache[video_path] = {
                    'hash': video_hash,
                    'duration': duration,
                    'fps': fps,
                    'valid': True
                }
            except Exception as e:
                print(f"Error cargando video {video_path}: {e}")
                logger.error(f"Error cargando video {video_path}: {e}")
                with open(f"{OUTPUT_PATH}/video_errors.log", 'a') as f:
                    f.write(f"{video_path}: {str(e)}\n")
                self.metadata_cache[video_path] = {
                    'hash': video_hash,
                    'duration': 0,
                    'fps': 0,
                    'valid': False
                }
                frames = [np.zeros((224, 224, 3), dtype=np.uint8) for _ in range(self.num_frames)]
                return frames
            finally:
                cap.release()

        cap = cv2.VideoCapture(video_path)
        try:
            if not cap.isOpened():
                raise ValueError(f"No se pudo abrir el video: {video_path}")
            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            indices = np.linspace(0, total_frames - 1, self.num_frames, dtype=int) if total_frames > self.num_frames else list(range(total_frames))
            for i in indices:
                cap.set(cv2.CAP_PROP_POS_FRAMES, i)
                ret, frame = cap.read()
                if ret:
                    height, width = frame.shape[:2]
                    if height < 100 or width < 100 or abs(width/height - 1) > 0.5:
                        logger.warning(f"Frame de tamaño inusual en {video_path}: {width}x{height}")
                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    frame = cv2.resize(frame, (224, 224))
                    frames.append(frame)
                else:
                    frames.append(frames[-1] if frames else np.zeros((224, 224, 3), dtype=np.uint8))
        except Exception as e:
            print(f"Error cargando video {video_path}: {e}")
            logger.error(f"Error cargando video {video_path}: {e}")
            with open(f"{OUTPUT_PATH}/video_errors.log", 'a') as f:
                f.write(f"{video_path}: {str(e)}\n")
            frames = [np.zeros((224, 224, 3), dtype=np.uint8) for _ in range(self.num_frames)]
        finally:
            cap.release()

        while len(frames) < self.num_frames:
            frames.append(frames[-1] if frames else np.zeros((224, 224, 3), dtype=np.uint8))
        return frames

    def __getitem__(self, idx):
        sample = self.samples[idx]
        video_path = sample['path']
        label = sample['label']
        frames = self.load_video(video_path)
        inputs = self.processor(images=frames, return_tensors="pt")
        video_tensor = inputs['pixel_values'].squeeze(0)
        return {
            'video': video_tensor,
            'label': torch.tensor(label, dtype=torch.long),
            'path': video_path
        }

try:
    train_dataset = VideoDataset(DATASET_PATH, 'train', num_frames=CONFIG['num_frames'])
    val_dataset = VideoDataset(DATASET_PATH, 'val', num_frames=CONFIG['num_frames'])
    test_dataset = VideoDataset(DATASET_PATH, 'test', num_frames=CONFIG['num_frames'])

    train_loader = DataLoader(
        train_dataset,
        batch_size=CONFIG['batch_size'],
        shuffle=True,
        num_workers=CONFIG['num_workers'],
        pin_memory=True,
        drop_last=True
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=CONFIG['batch_size'],
        shuffle=False,
        num_workers=CONFIG['num_workers'],
        pin_memory=True
    )

    test_loader = DataLoader(
        test_dataset,
        batch_size=CONFIG['batch_size'],
        shuffle=False,
        num_workers=CONFIG['num_workers'],
        pin_memory=True
    )

    print(f"Tamaño del batch de entrenamiento: {CONFIG['batch_size']}")
    print(f"Número de batches de entrenamiento: {len(train_loader)}")
    print(f"Número de batches de validación: {len(val_loader)}")
    print(f"Número de batches de test: {len(test_loader)}")
    logger.info(f"Dataloaders creados: train={len(train_loader)}, val={len(val_loader)}, test={len(test_loader)}")

    # Guardar metadata cache después de inicializar datasets
    train_dataset.save_metadata_cache()
    val_dataset.save_metadata_cache()
    test_dataset.save_metadata_cache()

except Exception as e:
    print(f"Error al cargar los datasets: {e}")
    logger.error(f"Error al cargar los datasets: {e}")
    import traceback
    traceback.print_exc()
    print("Verificando existencia de directorios del dataset...")
    for split in ['train', 'val', 'test']:
        for cls in ['no_violence', 'violence']:
            path = os.path.join(DATASET_PATH, split, cls)
            print(f"Directorio {path} existe: {os.path.exists(path)}")


Cargados 7999 videos para el split 'train'


preprocessor_config.json:   0%|          | 0.00/412 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Cargados 1500 videos para el split 'val'
Cargados 800 videos para el split 'test'
Tamaño del batch de entrenamiento: 8
Número de batches de entrenamiento: 999
Número de batches de validación: 188
Número de batches de test: 100


In [9]:
# ===================== MODELO: TIMESFORMER =====================
class TimesformerModule(pl.LightningModule):
    def __init__(self, model_name, num_classes=2, learning_rate=5e-5, weight_decay=0.01,
                 num_frames=8, class_weights=None, freeze_backbone=True):
        super().__init__()
        self.save_hyperparameters()
        self.model_name = model_name
        self.learning_rate = learning_rate
        self.weight_decay = weight_decay
        self.num_frames = num_frames
        self.freeze_backbone = freeze_backbone

        self.model = TimesformerForVideoClassification.from_pretrained(
            model_name,
            num_frames=num_frames,
            num_labels=num_classes,
            ignore_mismatched_sizes=True
        )

        if freeze_backbone:
            for name, param in self.model.named_parameters():
                if 'classifier' not in name:
                    param.requires_grad = False

        self.hidden_size = self.model.config.hidden_size

        if class_weights is not None:
            self.class_weights = torch.tensor(class_weights, dtype=torch.float32).to(device)
            self.criterion = nn.CrossEntropyLoss(weight=self.class_weights)
        else:
            self.criterion = nn.CrossEntropyLoss()

        self.train_metrics = MetricCollection({
            'accuracy': Accuracy(task='binary'),
            'precision': Precision(task='binary'),
            'recall': Recall(task='binary'),
            'f1': F1Score(task='binary'),
            'specificity': Specificity(task='binary'),
            'auroc': AUROC(task='binary')
        })

        self.val_metrics = MetricCollection({
            'accuracy': Accuracy(task='binary'),
            'precision': Precision(task='binary'),
            'recall': Recall(task='binary'),
            'f1': F1Score(task='binary'),
            'specificity': Specificity(task='binary'),
            'auroc': AUROC(task='binary')
        })

        self.test_metrics = MetricCollection({
            'accuracy': Accuracy(task='binary'),
            'precision': Precision(task='binary'),
            'recall': Recall(task='binary'),
            'f1': F1Score(task='binary'),
            'specificity': Specificity(task='binary'),
            'auroc': AUROC(task='binary')
        })

        self.confusion_matrix = ConfusionMatrix(task='binary')

    def forward(self, x):
        return self.model(pixel_values=x).logits

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(
            self.parameters(),
            lr=self.learning_rate,
            weight_decay=self.weight_decay
        )
        total_steps = self.trainer.estimated_stepping_batches
        scheduler = {
            'scheduler': LambdaLR(
                optimizer,
                lr_lambda=lambda step: min(1.0, step / CONFIG['warmup_steps']) if step < CONFIG['warmup_steps'] else 0.5 * (1.0 + math.cos(math.pi * (step - CONFIG['warmup_steps']) / (total_steps - CONFIG['warmup_steps'])))
            ),
            'interval': 'step',
            'frequency': 1
        }
        return [optimizer], [scheduler]

    def on_train_epoch_start(self):
        self.train_metrics.reset()

    def on_validation_epoch_start(self):
        self.val_metrics.reset()

    def on_test_epoch_start(self):
        self.test_metrics.reset()

    def training_step(self, batch, batch_idx):
        videos = batch['video'].to(device)
        labels = batch['label'].to(device)

        with autocast():
            logits = self(videos)
            loss = self.criterion(logits, labels)

        preds = torch.softmax(logits, dim=1)[:, 1]
        metrics = self.train_metrics(preds, labels)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
        self.log_dict({f'train_{k}': v for k, v in metrics.items()},
                      on_step=False, on_epoch=True, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        videos = batch['video'].to(device)
        labels = batch['label'].to(device)

        with autocast():
            logits = self(videos)
            loss = self.criterion(logits, labels)

        preds = torch.softmax(logits, dim=1)[:, 1]
        metrics = self.val_metrics(preds, labels)
        self.confusion_matrix.update(preds, labels)
        self.log('val_loss', loss, prog_bar=True)
        self.log_dict({f'val_{k}': v for k, v in metrics.items()}, prog_bar=True)
        return {'loss': loss, 'preds': preds, 'labels': labels}

    def on_validation_epoch_end(self):
        conf_matrix = self.confusion_matrix.compute().cpu().numpy()
        self.confusion_matrix.reset()
        self.last_confusion_matrix = conf_matrix

    def test_step(self, batch, batch_idx):
        videos = batch['video'].to(device)
        labels = batch['label'].to(device)

        with autocast():
            logits = self(videos)
            loss = self.criterion(logits, labels)

        preds = torch.softmax(logits, dim=1)[:, 1]
        metrics = self.test_metrics(preds, labels)
        self.confusion_matrix.update(preds, labels)
        self.log('test_loss', loss)
        self.log_dict({f'test_{k}': v for k, v in metrics.items()})
        return {'loss': loss, 'preds': preds, 'labels': labels, 'paths': batch['path']}

    def predict_step(self, batch, batch_idx):
        videos = batch['video'].to(device)
        with autocast():
            logits = self(videos)
            preds = torch.softmax(logits, dim=1)
        return preds


In [10]:
# ===================== CALLBACKS Y LOGGERS =====================
checkpoint_callback = ModelCheckpoint(
    dirpath=CHECKPOINTS_PATH,
    filename='timesformer-{epoch:02d}-{val_loss:.4f}-{val_f1:.4f}',
    monitor='val_f1',
    mode='max',
    save_top_k=2,
    save_last=True,
    every_n_epochs=CONFIG['save_every_n_epochs']
)

early_stopping_callback = EarlyStopping(
    monitor='val_f1',
    mode='max',
    patience=CONFIG['patience'],
    verbose=True
)

lr_monitor = LearningRateMonitor(logging_interval='step')

class MetricsLogger(pl.Callback):
    def __init__(self, output_path):
        super().__init__()
        self.output_path = output_path
        self.metrics = []

    def on_validation_epoch_end(self, trainer, pl_module):
        metrics = {
            'epoch': trainer.current_epoch,
            **{k: v.item() for k, v in trainer.callback_metrics.items() if k.startswith('val_')}
        }
        self.metrics.append(metrics)
        pd.DataFrame(self.metrics).to_csv(f"{self.output_path}/epoch_metrics.csv", index=False)

metrics_logger = MetricsLogger(OUTPUT_PATH)

logger_tb = TensorBoardLogger(LOGS_PATH, name='timesformer')


# ENTRENAMIENTO DEL MODELO TRANSFER-LEARNING

In [None]:
# ===================== ENTRENAMIENTO: TRANSFER LEARNING =====================
logger.info("Iniciando Transfer Learning")
print("\n=== INICIANDO ENTRENAMIENTO: TRANSFER LEARNING ===")
print(f"Configuración de transfer learning:")
print(f"- Modelo base: {CONFIG['model_type']}")
print(f"- Tamaño de frames: {CONFIG['image_size']}x{CONFIG['image_size']}")
print(f"- Número de frames: {CONFIG['num_frames']}")
print(f"- Batch size: {CONFIG['batch_size']}")
print(f"- Épocas: {CONFIG['epochs_transfer']}")
print(f"- Learning rate: {CONFIG['learning_rate_transfer']}")

train_labels = [sample['label'] for sample in train_dataset.samples]
class_counts = np.bincount(train_labels)
class_weights = 1.0 / class_counts
class_weights = class_weights / np.sum(class_weights) * len(class_counts)
print(f"Pesos de clases para balance: {class_weights}")
logger.info(f"Pesos de clases: {class_weights}")

transfer_model = TimesformerModule(
    model_name=CONFIG['model_type'],
    num_classes=len(CONFIG['classes']),
    learning_rate=CONFIG['learning_rate_transfer'],
    weight_decay=CONFIG['weight_decay'],
    num_frames=CONFIG['num_frames'],
    class_weights=class_weights,
    freeze_backbone=True
).to(device)

transfer_trainer = pl.Trainer(
    max_epochs=CONFIG['epochs_transfer'],
    accelerator='gpu',
    devices=1,
    logger=logger_tb,
    callbacks=[checkpoint_callback, early_stopping_callback, lr_monitor, metrics_logger],
    log_every_n_steps=10,
    deterministic=True,
    precision=16
)

transfer_trainer.fit(
    transfer_model,
    train_dataloaders=train_loader,
    val_dataloaders=val_loader
)

transfer_model_path = f"{OUTPUT_PATH}/transfer_learning_final.pt"
torch.save(transfer_model.state_dict(), transfer_model_path)
print(f"Modelo de transfer learning guardado en: {transfer_model_path}")
logger.info(f"Modelo de transfer learning guardado en: {transfer_model_path}")

best_model_path = checkpoint_callback.best_model_path
print(f"Mejor modelo guardado en: {best_model_path}")
logger.info(f"Mejor modelo guardado en: {best_model_path}")



=== INICIANDO ENTRENAMIENTO: TRANSFER LEARNING ===
Configuración de transfer learning:
- Modelo base: facebook/timesformer-base-finetuned-k400
- Tamaño de frames: 224x224
- Número de frames: 8
- Batch size: 8
- Épocas: 20
- Learning rate: 5e-05
Pesos de clases para balance: [0.99987498 1.00012502]


config.json:   0%|          | 0.00/22.7k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/486M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/486M [00:00<?, ?B/s]

Some weights of TimesformerForVideoClassification were not initialized from the model checkpoint at facebook/timesformer-base-finetuned-k400 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([400, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([400]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.ra

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_f1 improved. New best score: 0.479


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_f1 improved by 0.003 >= min_delta = 0.0. New best score: 0.482


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [None]:
transfer_val_results = transfer_trainer.validate(transfer_model, val_loader)[0]
print("\n=== RESULTADOS DE VALIDACIÓN (TRANSFER LEARNING) ===")
for metric, value in transfer_val_results.items():
    print(f"{metric}: {value:.4f}")
logger.info(f"Resultados de validación (Transfer Learning): {transfer_val_results}")

conf_matrix = transfer_model.last_confusion_matrix
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='g', cmap='Blues',
            xticklabels=CONFIG['classes'],
            yticklabels=CONFIG['classes'])
plt.xlabel('Predicción')
plt.ylabel('Ground Truth')
plt.title('Matriz de Confusión (Transfer Learning)')
plt.tight_layout()
plt.savefig(f"{PLOTS_PATH}/transfer_learning_confusion_matrix.png")
plt.close()

# ===================== LIMPIEZA =====================
logger.info("Liberando recursos")
print("\n=== LIMPIEZA Y LIBERACIÓN DE RECURSOS ===")
torch.cuda.empty_cache()
gc.collect()
if torch.cuda.is_available():
    print(f"Memoria GPU usada al finalizar: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
    print(f"Memoria GPU reservada al finalizar: {torch.cuda.memory_reserved() / 1e9:.2f} GB")
    logger.info(f"Memoria GPU usada: {torch.cuda.memory_allocated() / 1e9:.2f} GB, reservada: {torch.cuda.memory_reserved() / 1e9:.2f} GB")

print("\n=== ENTRENAMIENTO COMPLETADO ===")
print(f"Todos los resultados y modelos guardados en: {OUTPUT_PATH}")
logger.info("Entrenamiento completado")

# ENTRENAMIENTO CON FINE-TUNING

In [None]:
# ===================== ENTRENAMIENTO: FINE-TUNING =====================
logger.info("Iniciando Fine-Tuning")
print("\n=== INICIANDO FINE-TUNING ===")
print(f"Configuración de fine-tuning:")
print(f"- Modelo base: Transfer Learning")
print(f"- Tamaño de frames: {CONFIG['image_size']}x{CONFIG['image_size']}")
print(f"- Número de frames: {CONFIG['num_frames']}")
print(f"- Batch size: {CONFIG['batch_size_finetune']}")
print(f"- Épocas: {CONFIG['epochs_finetune']}")
print(f"- Learning rate: {CONFIG['learning_rate_finetune']}")

if not os.path.exists(best_model_path):
    raise FileNotFoundError(f"No se encontró el checkpoint: {best_model_path}")

ft_model = TimesformerModule.load_from_checkpoint(
    best_model_path,
    model_name=CONFIG['model_type'],
    num_classes=len(CONFIG['classes']),
    learning_rate=CONFIG['learning_rate_finetune'],
    weight_decay=CONFIG['weight_decay'],
    num_frames=CONFIG['num_frames'],
    class_weights=class_weights,
    freeze_backbone=False
).to(device)

finetune_train_loader = DataLoader(
    train_dataset,
    batch_size=CONFIG['batch_size_finetune'],
    shuffle=True,
    num_workers=CONFIG['num_workers'],
    pin_memory=True,
    drop_last=True
)

finetune_val_loader = DataLoader(
    val_dataset,
    batch_size=CONFIG['batch_size_finetune'],
    shuffle=False,
    num_workers=CONFIG['num_workers'],
    pin_memory=True
)

finetune_checkpoint_callback = ModelCheckpoint(
    dirpath=CHECKPOINTS_PATH,
    filename='timesformer-finetune-{epoch:02d}-{val_loss:.4f}-{val_f1:.4f}',
    monitor='val_f1',
    mode='max',
    save_top_k=3,
    save_last=True,
    every_n_epochs=CONFIG['save_every_n_epochs']
)

finetune_logger = TensorBoardLogger(LOGS_PATH, name='timesformer_finetune')

finetune_trainer = pl.Trainer(
    max_epochs=CONFIG['epochs_finetune'],
    accelerator='gpu',
    devices=1,
    logger=finetune_logger,
    callbacks=[finetune_checkpoint_callback, early_stopping_callback, lr_monitor, metrics_logger],
    log_every_n_steps=10,
    deterministic=True,
    precision=16
)

finetune_trainer.fit(
    ft_model,
    train_dataloaders=finetune_train_loader,
    val_dataloaders=finetune_val_loader
)

finetune_model_path = f"{OUTPUT_PATH}/fine_tuning_final.pt"
torch.save(ft_model.state_dict(), finetune_model_path)
print(f"Modelo de fine-tuning guardado en: {finetune_model_path}")
logger.info(f"Modelo de fine-tuning guardado en: {finetune_model_path}")

best_finetune_model_path = finetune_checkpoint_callback.best_model_path
print(f"Mejor modelo de fine-tuning guardado en: {best_finetune_model_path}")
logger.info(f"Mejor modelo de fine-tuning guardado en: {best_finetune_model_path}")

finetune_val_results = finetune_trainer.validate(ft_model, finetune_val_loader)[0]
print("\n=== RESULTADOS DE VALIDACIÓN (FINE-TUNING) ===")
for metric, value in finetune_val_results.items():
    print(f"{metric}: {value:.4f}")
logger.info(f"Resultados de validación (Fine-Tuning): {finetune_val_results}")

finetune_conf_matrix = ft_model.last_confusion_matrix
plt.figure(figsize=(10, 8))
sns.heatmap(finetune_conf_matrix, annot=True, fmt='g', cmap='Blues',
            xticklabels=CONFIG['classes'],
            yticklabels=CONFIG['classes'])
plt.xlabel('Predicción')
plt.ylabel('Ground Truth')
plt.title('Matriz de Confusión (Fine-Tuning)')
plt.tight_layout()
plt.savefig(f"{PLOTS_PATH}/fine_tuning_confusion_matrix.png")
plt.close()

# EVALUACIÓN DEL MODELO

In [None]:
# ===================== EVALUACIÓN EN CONJUNTO DE PRUEBA =====================
logger.info("Evaluando en conjunto de prueba")
print("\n=== EVALUANDO MODELO EN CONJUNTO DE PRUEBA ===")

test_loader = DataLoader(
    test_dataset,
    batch_size=CONFIG['batch_size_finetune'],
    shuffle=False,
    num_workers=CONFIG['num_workers'],
    pin_memory=True
)

best_model = TimesformerModule.load_from_checkpoint(best_finetune_model_path).to(device)
best_model.eval()
best_model.freeze()

test_results = finetune_trainer.test(best_model, test_loader)[0]
print("\n=== RESULTADOS DE PRUEBA (MEJOR MODELO) ===")
for metric, value in test_results.items():
    print(f"{metric}: {value:.4f}")
logger.info(f"Resultados de prueba: {test_results}")

In [None]:
# ===================== ANÁLISIS DETALLADO DE RESULTADOS =====================
logger.info("Realizando análisis detallado de resultados")
print("\n=== ANÁLISIS DETALLADO DE RESULTADOS ===")

all_preds = []
all_labels = []
all_paths = []

with torch.no_grad():
    for batch in tqdm(test_loader, desc="Analizando resultados"):
        videos = batch['video'].to(device)
        labels = batch['label'].to(device)
        paths = batch['path']
        with autocast():
            logits = best_model(videos)
            preds = torch.softmax(logits, dim=1)
        all_preds.append(preds.cpu())
        all_labels.append(labels.cpu())
        all_paths.extend(paths)

all_preds = torch.cat(all_preds, dim=0)
all_labels = torch.cat(all_labels, dim=0)
preds_np = all_preds.numpy()
labels_np = all_labels.numpy()
pred_classes = (preds_np[:, 1] >= 0.5).astype(int)  # Umbral por defecto

accuracy = accuracy_score(labels_np, pred_classes)
precision = precision_score(labels_np, pred_classes)
recall = recall_score(labels_np, pred_classes)
f1 = f1_score(labels_np, pred_classes)
conf_matrix = confusion_matrix(labels_np, pred_classes)
tn, fp, fn, tp = conf_matrix.ravel()
specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
tpr = recall
fpr = fp / (fp + tn) if (fp + tn) > 0 else 0

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print(f"Especificidad: {specificity:.4f}")
print(f"True Positive Rate (TPR): {tpr:.4f}")
print(f"False Positive Rate (FPR): {fpr:.4f}")

plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='g', cmap='Blues',
            xticklabels=CONFIG['classes'],
            yticklabels=CONFIG['classes'])
plt.xlabel('Predicción')
plt.ylabel('Ground Truth')
plt.title('Matriz de Confusión (Conjunto de Prueba)')
plt.tight_layout()
plt.savefig(f"{PLOTS_PATH}/test_confusion_matrix.png")
plt.close()

class_report = classification_report(labels_np, pred_classes,
                                    target_names=CONFIG['classes'],
                                    output_dict=True)
class_report_df = pd.DataFrame(class_report).transpose()
print("\nInforme de Clasificación:")
print(class_report_df)
class_report_df.to_csv(f"{RESULTS_PATH}/classification_report.csv")

violence_probs = preds_np[:, 1]
fpr, tpr, thresholds = roc_curve(labels_np, violence_probs)
roc_auc = roc_auc_score(labels_np, violence_probs)

plt.figure(figsize=(10, 8))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.4f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Tasa de Falsos Positivos')
plt.ylabel('Tasa de Verdaderos Positivos')
plt.title('Curva ROC')
plt.legend(loc="lower right")
plt.grid(True, alpha=0.3)
plt.savefig(f"{PLOTS_PATH}/roc_curve.png")
plt.close()

precision_curve, recall_curve, _ = precision_recall_curve(labels_np, violence_probs)
average_precision = average_precision_score(labels_np, violence_probs)

plt.figure(figsize=(10, 8))
plt.plot(recall_curve, precision_curve, color='blue', lw=2,
         label=f'Precision-Recall curve (AP = {average_precision:.4f})')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('Curva Precision-Recall')
plt.legend(loc="lower left")
plt.grid(True, alpha=0.3)
plt.savefig(f"{PLOTS_PATH}/precision_recall_curve.png")
plt.close()

In [None]:
# ===================== ANÁLISIS DE ERRORES =====================
logger.info("Analizando errores")
print("\n=== ANÁLISIS DE ERRORES ===")

incorrect_indices = np.where(pred_classes != labels_np)[0]
incorrect_paths = [all_paths[i] for i in incorrect_indices]
incorrect_labels = labels_np[incorrect_indices]
incorrect_preds = pred_classes[incorrect_indices]
incorrect_probs = preds_np[incorrect_indices]

print(f"Número de predicciones incorrectas: {len(incorrect_indices)} de {len(labels_np)} ({len(incorrect_indices)/len(labels_np)*100:.2f}%)")

fp_violence = np.where((incorrect_preds == 1) & (incorrect_labels == 0))[0]
fn_violence = np.where((incorrect_preds == 0) & (incorrect_labels == 1))[0]

print(f"Falsos Positivos (No Violencia → Violencia): {len(fp_violence)}")
print(f"Falsos Negativos (Violencia → No Violencia): {len(fn_violence)}")

if len(fp_violence) > 0:
    fp_confidence = incorrect_probs[fp_violence, 1]
    print(f"Confianza promedio en falsos positivos: {np.mean(fp_confidence):.4f}")
if len(fn_violence) > 0:
    fn_confidence = incorrect_probs[fn_violence, 0]
    print(f"Confianza promedio en falsos negativos: {np.mean(fn_confidence):.4f}")

plt.figure(figsize=(12, 6))
if len(fp_violence) > 0:
    plt.subplot(1, 2, 1)
    plt.hist(fp_confidence, bins=10, alpha=0.7)
    plt.xlabel('Confianza (Violencia)')
    plt.ylabel('Frecuencia')
    plt.title('Distribución de Confianza - Falsos Positivos')
    plt.grid(True, alpha=0.3)

if len(fn_violence) > 0:
    plt.subplot(1, 2, 2)
    plt.hist(fn_confidence, bins=10, alpha=0.7)
    plt.xlabel('Confianza (No Violencia)')
    plt.ylabel('Frecuencia')
    plt.title('Distribución de Confianza - Falsos Negativos')
    plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(f"{PLOTS_PATH}/error_confidence_distribution.png")
plt.close()

In [None]:
# ===================== ANÁLISIS DE CURVAS DE APRENDIZAJE =====================
logger.info("Analizando curvas de aprendizaje")
print("\n=== ANÁLISIS DE CURVAS DE APRENDIZAJE ===")

def get_metrics_from_logs(log_dir, metrics=['train_loss_epoch', 'val_loss', 'val_f1', 'val_accuracy']):
    metrics_data = {metric: [] for metric in metrics}
    metrics_data['epochs'] = []
    event_files = [f for f in os.listdir(log_dir) if f.startswith('events.out.tfevents')]
    if not event_files:
        print(f"No se encontraron archivos de TensorBoard en {log_dir}")
        logger.warning(f"No se encontraron archivos de TensorBoard en {log_dir}")
        return metrics_data
    event_acc = EventAccumulator(os.path.join(log_dir, event_files[0]))
    event_acc.Reload()
    for metric in metrics:
        if metric in event_acc.Tags()['scalars']:
            events = event_acc.Scalars(metric)
            metrics_data[metric] = [e.value for e in events]
            if metric == metrics[0]:
                metrics_data['epochs'] = [e.step for e in events]
    return metrics_data

transfer_metrics = get_metrics_from_logs(os.path.join(LOGS_PATH, 'timesformer'))
finetune_metrics = get_metrics_from_logs(os.path.join(LOGS_PATH, 'timesformer_finetune'))
finetune_metrics['epochs'] = [e + CONFIG['epochs_transfer'] for e in finetune_metrics['epochs']]

plt.figure(figsize=(15, 10))
plt.subplot(2, 2, 1)
plt.plot(transfer_metrics['epochs'], transfer_metrics['train_loss_epoch'], 'b-', label='Train Loss (Transfer)')
plt.plot(transfer_metrics['epochs'], transfer_metrics['val_loss'], 'g-', label='Val Loss (Transfer)')
plt.plot(finetune_metrics['epochs'], finetune_metrics['train_loss_epoch'], 'b--', label='Train Loss (Fine-tune)')
plt.plot(finetune_metrics['epochs'], finetune_metrics['val_loss'], 'g--', label='Val Loss (Fine-tune)')
plt.axvline(x=CONFIG['epochs_transfer'] + 0.5, color='r', linestyle='--', label='Inicio Fine-tuning')
plt.xlabel('Época')
plt.ylabel('Pérdida')
plt.title('Curvas de Pérdida')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(2, 2, 2)
plt.plot(transfer_metrics['epochs'], transfer_metrics['val_f1'], 'g-', label='Val F1 (Transfer)')
plt.plot(finetune_metrics['epochs'], finetune_metrics['val_f1'], 'g--', label='Val F1 (Fine-tune)')
plt.axvline(x=CONFIG['epochs_transfer'] + 0.5, color='r', linestyle='--', label='Inicio Fine-tuning')
plt.xlabel('Época')
plt.ylabel('F1-Score')
plt.title('Evolución del F1-Score')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(2, 2, 3)
plt.plot(transfer_metrics['epochs'], transfer_metrics['val_accuracy'], 'g-', label='Val Accuracy (Transfer)')
plt.plot(finetune_metrics['epochs'], finetune_metrics['val_accuracy'], 'g--', label='Val Accuracy (Fine-tune)')
plt.axvline(x=CONFIG['epochs_transfer'] + 0.5, color='r', linestyle='--', label='Inicio Fine-tuning')
plt.xlabel('Época')
plt.ylabel('Accuracy')
plt.title('Evolución de Accuracy')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(f"{PLOTS_PATH}/learning_curves.png")
plt.close()

In [None]:
# ===================== ANÁLISIS DE UMBRALES DE DECISIÓN =====================
logger.info("Analizando umbrales de decisión")
print("\n=== ANÁLISIS DE UMBRALES DE DECISIÓN ===")

thresholds = np.arange(0.1, 1.0, 0.05)
threshold_metrics = []

for threshold in thresholds:
    custom_preds = (preds_np[:, 1] >= threshold).astype(int)
    acc = accuracy_score(labels_np, custom_preds)
    prec = precision_score(labels_np, custom_preds, zero_division=0)
    rec = recall_score(labels_np, custom_preds, zero_division=0)
    f1 = f1_score(labels_np, custom_preds, zero_division=0)
    threshold_metrics.append({
        'threshold': threshold,
        'accuracy': acc,
        'precision': prec,
        'recall': rec,
        'f1': f1
    })

threshold_df = pd.DataFrame(threshold_metrics)
print(threshold_df)

plt.figure(figsize=(12, 6))
plt.plot(threshold_df['threshold'], threshold_df['accuracy'], 'o-', label='Accuracy')
plt.plot(threshold_df['threshold'], threshold_df['precision'], 'o-', label='Precision')
plt.plot(threshold_df['threshold'], threshold_df['recall'], 'o-', label='Recall')
plt.plot(threshold_df['threshold'], threshold_df['f1'], 'o-', label='F1-Score')
plt.xlabel('Umbral de Decisión')
plt.ylabel('Valor')
plt.title('Efecto del Umbral de Decisión en las Métricas')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig(f"{PLOTS_PATH}/threshold_analysis.png")
plt.close()

optimal_idx = threshold_df['f1'].idxmax()
optimal_threshold = threshold_df.loc[optimal_idx, 'threshold']
optimal_f1 = threshold_df.loc[optimal_idx, 'f1']

print(f"Umbral óptimo para F1-Score: {optimal_threshold:.2f} (F1 = {optimal_f1:.4f})")

optimal_preds = (preds_np[:, 1] >= optimal_threshold).astype(int)
optimal_accuracy = accuracy_score(labels_np, optimal_preds)
optimal_precision = precision_score(labels_np, optimal_preds)
optimal_recall = recall_score(labels_np, optimal_preds)
optimal_f1 = f1_score(labels_np, optimal_preds)

print("\nMétricas con umbral óptimo:")
print(f"Accuracy: {optimal_accuracy:.4f}")
print(f"Precision: {optimal_precision:.4f}")
print(f"Recall: {optimal_recall:.4f}")
print(f"F1-Score: {optimal_f1:.4f}")

In [None]:
# ===================== COMPARACIÓN DE MODELOS =====================
logger.info("Comparando modelos")
print("\n=== COMPARACIÓN DE MODELOS ===")

comparison_data = {
    'Modelo': ['Transfer Learning', 'Fine-Tuning'],
    'Accuracy': [transfer_val_results['val_accuracy'], finetune_val_results['val_accuracy']],
    'Precision': [transfer_val_results['val_precision'], finetune_val_results['val_precision']],
    'Recall': [transfer_val_results['val_recall'], finetune_val_results['val_recall']],
    'F1-Score': [transfer_val_results['val_f1'], finetune_val_results['val_f1']],
    'Specificity': [transfer_val_results['val_specificity'], finetune_val_results['val_specificity']],
    'Loss': [transfer_val_results['val_loss'], finetune_val_results['val_loss']]
}

comparison_df = pd.DataFrame(comparison_data)
print(comparison_df)

plt.figure(figsize=(12, 6))
metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'Specificity']
transfer_values = [comparison_data[metric][0] for metric in metrics]
finetune_values = [comparison_data[metric][1] for metric in metrics]
x = np.arange(len(metrics))
width = 0.35
plt.bar(x - width/2, transfer_values, width, label='Transfer Learning')
plt.bar(x + width/2, finetune_values, width, label='Fine-Tuning')
plt.xlabel('Métrica')
plt.ylabel('Valor')
plt.title('Comparación de Métricas entre Transfer Learning y Fine-Tuning')
plt.xticks(x, metrics)
plt.legend()
plt.grid(True, alpha=0.3)
for i, v in enumerate(transfer_values):
    plt.text(i - width/2, v + 0.01, f'{v:.3f}', ha='center')
for i, v in enumerate(finetune_values):
    plt.text(i + width/2, v + 0.01, f'{v:.3f}', ha='center')
plt.ylim(0, 1.1)
plt.tight_layout()
plt.savefig(f"{PLOTS_PATH}/model_comparison.png")
plt.close()

In [None]:
# ===================== ANÁLISIS DE DURACIÓN DE VIDEOS VS RENDIMIENTO =====================
logger.info("Analizando duración de videos vs rendimiento")
print("\n=== ANÁLISIS DE DURACIÓN DE VIDEOS VS RENDIMIENTO ===")

def get_video_duration(video_path):
    try:
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            return None
        fps = cap.get(cv2.CAP_PROP_FPS)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        duration = total_frames / fps if fps > 0 else None
        cap.release()
        return duration
    except Exception as e:
        print(f"Error obteniendo duración de {video_path}: {e}")
        logger.error(f"Error obteniendo duración de {video_path}: {e}")
        return None

durations = []
results = []

for i, path in enumerate(all_paths):
    duration = train_dataset.metadata_cache.get(path, {}).get('duration', get_video_duration(path))
    if duration is not None:
        durations.append(duration)
        true_label = labels_np[i]
        pred_label = pred_classes[i]
        is_correct = true_label == pred_label
        confidence = preds_np[i, pred_label]
        results.append({
            'path': path,
            'duration': duration,
            'true_label': true_label,
            'pred_label': pred_label,
            'is_correct': is_correct,
            'confidence': confidence
        })

duration_results_df = pd.DataFrame(results)
duration_bins = [0, 3, 4, 5, 6, 7]
duration_labels = ['< 3s', '3-4s', '4-5s', '5-6s', '> 6s']
duration_results_df['duration_bin'] = pd.cut(
    duration_results_df['duration'],
    bins=duration_bins,
    labels=duration_labels,
    right=False
)

accuracy_by_duration = duration_results_df.groupby('duration_bin')['is_correct'].mean()
plt.figure(figsize=(12, 6))
plt.bar(accuracy_by_duration.index, accuracy_by_duration.values)
plt.xlabel('Duración del Video')
plt.ylabel('Precisión')
plt.title('Precisión vs Duración del Video')
plt.ylim(0, 1)
plt.grid(True, alpha=0.3)
for i, v in enumerate(accuracy_by_duration.values):
    plt.text(i, v + 0.02, f'{v:.3f}', ha='center')
plt.tight_layout()
plt.savefig(f"{PLOTS_PATH}/accuracy_vs_duration.png")
plt.close()

plt.figure(figsize=(12, 6))
sns.boxplot(x='duration_bin', y='confidence', hue='is_correct', data=duration_results_df)
plt.xlabel('Duración del Video')
plt.ylabel('Confianza de Predicción')
plt.title('Confianza vs Duración del Video (por Resultado)')
plt.grid(True, alpha=0.3)
plt.legend(title='Predicción Correcta')
plt.tight_layout()
plt.savefig(f"{PLOTS_PATH}/confidence_vs_duration.png")
plt.close()

In [None]:
# ===================== EXPORTACIÓN DEL MODELO FINAL =====================
logger.info("Exportando modelo final")
print("\n=== EXPORTACIÓN DEL MODELO FINAL ===")

torch.save(best_model.state_dict(), f"{OUTPUT_PATH}/timesformer_violence_detection_state_dict.pt")
print(f"Modelo guardado en formato PyTorch: {OUTPUT_PATH}/timesformer_violence_detection_state_dict.pt")
logger.info(f"Modelo guardado en formato PyTorch: {OUTPUT_PATH}/timesformer_violence_detection_state_dict.pt")

try:
    dummy_input = torch.randn(1, 3, CONFIG['num_frames'], CONFIG['image_size'], CONFIG['image_size']).to(device)
    torch.onnx.export(
        best_model.model,
        dummy_input,
        f"{OUTPUT_PATH}/timesformer_violence_detection.onnx",
        export_params=True,
        opset_version=12,
        do_constant_folding=True,
        input_names=['pixel_values'],
        output_names=['logits'],
        dynamic_axes={'pixel_values': {0: 'batch_size'}, 'logits': {0: 'batch_size'}}
    )
    onnx_model = onnx.load(f"{OUTPUT_PATH}/timesformer_violence_detection.onnx")
    onnx.checker.check_model(onnx_model)
    print(f"Modelo exportado y verificado en formato ONNX: {OUTPUT_PATH}/timesformer_violence_detection.onnx")
    logger.info(f"Modelo exportado y verificado en formato ONNX: {OUTPUT_PATH}/timesformer_violence_detection.onnx")
except Exception as e:
    print(f"Error al exportar modelo en formato ONNX: {e}")
    logger.error(f"Error al exportar modelo en formato ONNX: {e}")

from torch.quantization import quantize_dynamic
quantized_model = quantize_dynamic(
    best_model.model.cpu(),
    {torch.nn.Linear},
    dtype=torch.qint8
)
torch.save(quantized_model.state_dict(), f"{OUTPUT_PATH}/timesformer_quantized.pt")
print(f"Modelo cuantizado guardado: {OUTPUT_PATH}/timesformer_quantized.pt")
logger.info(f"Modelo cuantizado guardado: {OUTPUT_PATH}/timesformer_quantized.pt")

def measure_inference_time(model, dummy_input, iterations=100):
    model.eval()
    start_time = time.time()
    with torch.no_grad():
        for _ in range(iterations):
            _ = model(dummy_input)
    avg_time = (time.time() - start_time) / iterations
    fps = 1 / avg_time
    print(f"Tiempo promedio de inferencia: {avg_time:.4f}s, FPS: {fps:.2f}")
    logger.info(f"Tiempo promedio de inferencia: {avg_time:.4f}s, FPS: {fps:.2f}")
dummy_input = torch.randn(1, 3, CONFIG['num_frames'], CONFIG['image_size'], CONFIG['image_size']).to(device)
measure_inference_time(best_model, dummy_input)

model_config = {
    'model_type': CONFIG['model_type'],
    'num_frames': CONFIG['num_frames'],
    'image_size': CONFIG['image_size'],
    'num_classes': len(CONFIG['classes']),
    'class_mapping': {i: name for i, name in enumerate(CONFIG['classes'])},
    'optimal_threshold': float(optimal_threshold),
    'metrics': {
        'accuracy': float(optimal_accuracy),
        'precision': float(optimal_precision),
        'recall': float(optimal_recall),
        'f1': float(optimal_f1),
        'specificity': float(specificity),
        'tpr': float(tpr),
        'fpr': float(fpr)
    },
    'training_details': {
        'transfer_learning_epochs': CONFIG['epochs_transfer'],
        'fine_tuning_epochs': CONFIG['epochs_finetune'],
        'batch_size': CONFIG['batch_size'],
        'batch_size_finetune': CONFIG['batch_size_finetune'],
        'learning_rate_transfer': CONFIG['learning_rate_transfer'],
        'learning_rate_finetune': CONFIG['learning_rate_finetune'],
        'trained_on': str(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
    }
}

with open(f"{OUTPUT_PATH}/model_config.json", 'w') as f:
    json.dump(model_config, f, indent=4)
print(f"Configuración y metadatos del modelo guardados en: {OUTPUT_PATH}/model_config.json")
logger.info(f"Configuración y metadatos guardados en: {OUTPUT_PATH}/model_config.json")

In [None]:
# ===================== INFORME FINAL =====================
logger.info("Generando informe final")
print("\n=== GENERANDO INFORME FINAL ===")

# Resumen de videos problemáticos
video_errors_summary = ""
if os.path.exists(f"{OUTPUT_PATH}/video_errors.log"):
    with open(f"{OUTPUT_PATH}/video_errors.log", 'r') as f:
        errors = f.readlines()
    video_errors_summary = f"- **Videos problemáticos**: {len(errors)} videos con errores de carga (ver `{OUTPUT_PATH}/video_errors.log`).\n"

report = f"""# Informe de Entrenamiento de TimeSformer para Detección de Violencia

## Resumen
- **Modelo**: {CONFIG['model_type']}
- **Dataset**: Detección de violencia para prevención de violencia escolar
- **Fecha de entrenamiento**: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}

## Configuración de Entrenamiento
- **Transfer Learning**:
  - Batch Size: {CONFIG['batch_size']}
  - Learning Rate: {CONFIG['learning_rate_transfer']}
  - Optimizador: AdamW
  - Épocas: {CONFIG['epochs_transfer']}
  - Backbone congelado: Sí
- **Fine-Tuning**:
  - Batch Size: {CONFIG['batch_size_finetune']}
  - Learning Rate: {CONFIG['learning_rate_finetune']}
  - Optimizador: AdamW
  - Épocas: {CONFIG['epochs_finetune']}
  - Backbone congelado: No

## Métricas Finales (Umbral Óptimo: {optimal_threshold:.2f})
- **Accuracy**: {optimal_accuracy:.4f}
- **Precision**: {optimal_precision:.4f}
- **Recall**: {optimal_recall:.4f}
- **F1-Score**: {optimal_f1:.4f}
- **Especificidad**: {specificity:.4f}
- **True Positive Rate (TPR)**: {tpr:.4f}
- **False Positive Rate (FPR)**: {fpr:.4f}
- **ROC-AUC**: {roc_auc:.4f}
- **Average Precision (PR-AUC)**: {average_precision:.4f}
- **BLEU**: No aplicable, ya que el modelo realiza clasificación de videos, no generación de texto.

## Análisis del Dataset
- **Total de videos**: {total_videos}
- **Train**: {total_train} videos ({train_pct:.1f}%)
- **Validación**: {total_val} videos ({val_pct:.1f}%)
- **Test**: {total_test} videos ({test_pct:.1f}%)
- **Balance de Clases**:
  - Train: {train_balance:.1f}% violencia
  - Validación: {val_balance:.1f}% violencia
  - Test: {test_balance:.1f}% violencia

## Análisis de Errores
- **Falsos Positivos (No Violencia → Violencia)**: {len(fp_violence)} casos
- **Falsos Negativos (Violencia → No Violencia)**: {len(fn_violence)} casos
- **Total de errores**: {len(incorrect_indices)} de {len(labels_np)} ({len(incorrect_indices)/len(labels_np)*100:.2f}%)

## Caché de Metadatos
- **Archivo**: `{OUTPUT_PATH}/video_metadata.json`
- **Uso**: Almacena duración, FPS, y validez de videos para acelerar cargas futuras.
{video_errors_summary}
## Recomendaciones
1. **Verificar videos problemáticos**: Revisar `{OUTPUT_PATH}/video_errors.log` para identificar videos con errores de carga.
2. **Ajustar umbral de decisión**: Usar un umbral de {optimal_threshold:.2f} para optimizar F1-Score. Ajustar según necesidades de precisión o recall.
3. **Optimización para tiempo real**: Usar el modelo ONNX o cuantizado para inferencia rápida. Verificar FPS en producción.
4. **Integración con YOLOv11/DeepSORT**: Asegurar compatibilidad de formatos y latencia para el sistema completo.

## Archivos del Modelo
- **Modelo PyTorch**: `{OUTPUT_PATH}/timesformer_violence_detection_state_dict.pt`
- **Modelo ONNX**: `{OUTPUT_PATH}/timesformer_violence_detection.onnx`
- **Modelo Cuantizado**: `{OUTPUT_PATH}/timesformer_quantized.pt`
- **Configuración**: `{OUTPUT_PATH}/model_config.json`
- **Resultados y gráficos**: `{RESULTS_PATH}/`
"""

report_path = f"{OUTPUT_PATH}/training_report.md"
with open(report_path, 'w') as f:
    f.write(report)
print(f"Informe final generado en: {report_path}")
logger.info(f"Informe final generado en: {report_path}")


In [None]:
# ===================== RECURSOS Y LIMPIEZA =====================
logger.info("Liberando recursos")
print("\n=== LIMPIEZA Y LIBERACIÓN DE RECURSOS ===")
torch.cuda.empty_cache()
gc.collect()
if torch.cuda.is_available():
    print(f"Memoria GPU usada al finalizar: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
    print(f"Memoria GPU reservada al finalizar: {torch.cuda.memory_reserved() / 1e9:.2f} GB")
    logger.info(f"Memoria GPU usada: {torch.cuda.memory_allocated() / 1e9:.2f} GB, reservada: {torch.cuda.memory_reserved() / 1e9:.2f} GB")

print("\n=== ENTRENAMIENTO COMPLETADO ===")
print(f"Todos los resultados y modelos guardados en: {OUTPUT_PATH}")
logger.info("Entrenamiento completado")