# üéì VideoMAE - Sign Language Recognition (WLASL)
## Clean Training & Evaluation Pipeline

**Autor:** Rafael Ovalle - Tesis UNAB  
**Dataset:** WLASL100/WLASL300  
**Modelo:** VideoMAE

---

### üìã Configuraciones Disponibles:

| Config | Dataset | Train | Val | Test | Batch | LR | Uso |
|--------|---------|-------|-----|------|-------|----|-----|
| **V1-100** | 100 clases | 807 | 194 | 117 | 16 | 1e-4 | Baseline |
| **V2-100** | 100 clases | 1,001 | 117 | 117 | 6 | 1e-5 | Maximizar datos |
| **V1-300** | 300 clases | 1,959 | 557 | 271 | 16 | 1e-4 | Baseline |
| **V2-300** | 300 clases | 2,516 | 271 | 271 | 6 | 1e-5 | Maximizar datos |

---

# 1Ô∏è‚É£ Setup Inicial

In [None]:
# Verificar GPU
!nvidia-smi

import torch
print(f"\n{'='*60}")
print(f"PyTorch: {torch.__version__}")
print(f"CUDA disponible: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memoria: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
print(f"{'='*60}")

In [None]:
# Montar Google Drive
from google.colab import drive
drive.mount('/content/drive')

DRIVE_ROOT = "/content/drive/MyDrive/TESIS_WLASL"
print(f"‚úÖ Drive montado: {DRIVE_ROOT}")

In [None]:
# Clonar repositorio (si es necesario)
import os
if not os.path.exists('AtiendeSenas-MVP'):
    !git clone https://github.com/Ov4llezz/AtiendeSenas-MVP.git
    %cd AtiendeSenas-MVP
else:
    %cd AtiendeSenas-MVP
    !git pull

print("‚úÖ Repositorio listo")

In [None]:
# Instalar dependencias
from colab_utils.config import setup_environment
setup_environment()

# 2Ô∏è‚É£ Configuraci√≥n del Experimento

**üéØ Configura tu experimento aqu√≠:**

In [None]:
from colab_utils.config import create_config, print_config, save_config

# ============================================================
#   CONFIGURA TU EXPERIMENTO AQU√ç
# ============================================================
DATASET_TYPE = "wlasl100"  # "wlasl100" o "wlasl300"
VERSION = "v1"             # "v1" (baseline) o "v2" (experimental)
# ============================================================

# Crear configuraci√≥n
config = create_config(
    dataset_type=DATASET_TYPE,
    version=VERSION,
    drive_root=DRIVE_ROOT
)

# Mostrar configuraci√≥n
print_config(config)

# Guardar configuraci√≥n
save_config(config, config['results_dir'])

# 3Ô∏è‚É£ Cargar Datasets

In [None]:
from torch.utils.data import DataLoader
from colab_utils.dataset import WLASLVideoDataset

print("[INFO] Cargando datasets...\n")

# Crear datasets
train_dataset = WLASLVideoDataset(
    split="train",
    base_path=config['data_root'],
    dataset_size=config['num_classes']
)

val_dataset = WLASLVideoDataset(
    split="val",
    base_path=config['data_root'],
    dataset_size=config['num_classes']
)

test_dataset = WLASLVideoDataset(
    split="test",
    base_path=config['data_root'],
    dataset_size=config['num_classes']
)

# Crear dataloaders
train_loader = DataLoader(
    train_dataset,
    batch_size=config['batch_size'],
    shuffle=True,
    num_workers=config['num_workers'],
    pin_memory=True if config['device'] == "cuda" else False
)

val_loader = DataLoader(
    val_dataset,
    batch_size=config['batch_size'],
    shuffle=False,
    num_workers=config['num_workers'],
    pin_memory=True if config['device'] == "cuda" else False
)

test_loader = DataLoader(
    test_dataset,
    batch_size=config['batch_size'],
    shuffle=False,
    num_workers=config['num_workers'],
    pin_memory=True if config['device'] == "cuda" else False
)

print(f"{'='*60}")
print(f"DATASETS CARGADOS")
print(f"{'='*60}")
print(f"Train:      {len(train_dataset):,} videos ({len(train_loader)} batches)")
print(f"Validation: {len(val_dataset):,} videos ({len(val_loader)} batches)")
print(f"Test:       {len(test_dataset):,} videos ({len(test_loader)} batches)")
print(f"{'='*60}\n")

# 4Ô∏è‚É£ Entrenamiento

In [None]:
from colab_utils.training import train_model

# Entrenar modelo
model, training_history, run_checkpoint_dir, log_dir = train_model(
    config=config,
    train_loader=train_loader,
    val_loader=val_loader,
    train_dataset=train_dataset
)

print("\n‚úÖ Entrenamiento completado!")
print(f"Checkpoints: {run_checkpoint_dir}")
print(f"Logs: {log_dir}")

# 5Ô∏è‚É£ Visualizar Curvas de Entrenamiento

In [None]:
import pandas as pd
from colab_utils.visualization import plot_training_curves

# Leer historial
history_df = pd.DataFrame(training_history)

# Graficar curvas
from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
curves_path = f"{config['results_dir']}/training_curves_{timestamp}.png"

plot_training_curves(history_df, save_path=curves_path)

# 6Ô∏è‚É£ Evaluaci√≥n en Test Set

In [None]:
from colab_utils.evaluation import evaluate_detailed, print_results, print_top_classes

# Cargar mejor modelo
print("[INFO] Cargando mejor modelo...\n")
best_model_path = f"{run_checkpoint_dir}/best_model.pt"
checkpoint = torch.load(best_model_path, map_location=config['device'])
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

print(f"‚úÖ Modelo cargado (Epoch {checkpoint['epoch']}, Val Acc: {checkpoint['val_acc']:.2f}%)\n")

# Evaluar en test set
print(f"{'='*70}")
print(f"{'EVALUACI√ìN EN TEST SET':^70}")
print(f"{'='*70}\n")

test_results = evaluate_detailed(
    model=model,
    dataloader=test_loader,
    device=config['device'],
    num_classes=config['num_classes']
)

# Mostrar resultados
print_results(test_results)
print_top_classes(test_results, top_n=10)

# 7Ô∏è‚É£ Visualizaciones Completas

In [None]:
from colab_utils.visualization import visualize_all_results

# Generar todas las visualizaciones
viz_paths = visualize_all_results(
    results=test_results,
    history_df=history_df,
    output_dir=config['results_dir'],
    timestamp=timestamp
)

# 8Ô∏è‚É£ Guardar Resultados

In [None]:
from colab_utils.evaluation import save_results

# Preparar info del checkpoint
checkpoint_info = {
    'best_epoch': int(checkpoint['epoch']),
    'best_val_loss': float(checkpoint['val_loss']),
    'best_val_acc': float(checkpoint['val_acc']),
    'total_epochs_trained': len(training_history),
}

# Guardar resultados
json_path, txt_path, pred_path, ts = save_results(
    results=test_results,
    config=config,
    checkpoint_info=checkpoint_info,
    output_dir=config['results_dir']
)

print(f"\n{'='*80}")
print(f"{'ARCHIVOS GENERADOS':^80}")
print(f"{'='*80}")
print(f"Checkpoints:      {run_checkpoint_dir}")
print(f"Logs TensorBoard: {log_dir}")
print(f"JSON completo:    {json_path}")
print(f"Reporte TXT:      {txt_path}")
print(f"Predicciones:     {pred_path}")
print(f"Visualizaciones:  {config['results_dir']}")
print(f"{'='*80}\n")

# 9Ô∏è‚É£ TensorBoard (Opcional)

In [None]:
# Cargar extensi√≥n TensorBoard
%load_ext tensorboard

# Lanzar TensorBoard
%tensorboard --logdir {log_dir}

# üîü Descargar Resultados

In [None]:
# Comprimir todos los resultados
!zip -r results_{timestamp}.zip \
    {config['results_dir']} \
    {run_checkpoint_dir} \
    {log_dir}

# Descargar
from google.colab import files
files.download(f'results_{timestamp}.zip')

print("\n‚úÖ ¬°Resultados descargados!")

---

## ‚úÖ ¬°Experimento Completado!

### üìä Resumen:
- **Dataset:** {config['dataset_type'].upper()}
- **Versi√≥n:** {config['version'].upper()}
- **Test Accuracy:** Ver resultados arriba
- **Todos los archivos guardados en Google Drive**

---