# 04 - Modelo KoVAE: Predicciones Probabil√≠sticas

**Objetivo**: Implementar y evaluar KoVAE (Koopman Variational Autoencoder) para predicciones probabil√≠sticas de precipitaci√≥n.

**Diferencias con AE+DMD**:
- **AE+DMD**: Determin√≠stico, predice un √∫nico valor
- **KoVAE**: Probabil√≠stico, genera distribuci√≥n completa con intervalos de confianza

**Ventajas de KoVAE**:
1. Cuantificaci√≥n de incertidumbre (intervalos de confianza)
2. Predicciones multimodales (escenarios alternativos)
3. √ötil para an√°lisis de riesgo y toma de decisiones

**Autor**: Capstone Project - Pron√≥stico H√≠brido Precipitaciones Chile  
**Fecha**: 19 Noviembre 2025

In [None]:
# ====================================================================================
# 1. IMPORTAR LIBRER√çAS
# ====================================================================================

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import pickle
import sys
sys.path.append('..')

from src.models.kovae import KoVAE
from src.utils.metrics import calculate_mae, calculate_rmse
import warnings
warnings.filterwarnings('ignore')

# Configuraci√≥n visual
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 8)

# Directorios
DATA_DIR = Path('../data')
MODEL_DIR = Path('../data/models')
FIG_DIR = Path('../reports/figures')
MODEL_DIR.mkdir(parents=True, exist_ok=True)

print("‚úÖ Librer√≠as importadas correctamente")

In [None]:
# ====================================================================================
# 2. CARGAR DATOS DE ENTRENAMIENTO
# ====================================================================================

# Cargar datos de precipitaci√≥n preprocesados
precip_data = np.load(DATA_DIR / 'raw' / 'precipitation_data.npy')
print(f"üì¶ Datos cargados: {precip_data.shape}")

# El archivo tiene shape (1826, 30) seg√∫n an√°lisis previo
# Necesitamos convertir a formato (samples, lat, lon, 1)
# Por ahora, vamos a usar el dataset completo del forecast_results

# Cargar desde forecast_results que tiene la estructura correcta
with open(DATA_DIR / 'processed' / 'forecast_results_2020.pkl', 'rb') as f:
    forecast_results = pickle.load(f)

# Datos de test
y_test = forecast_results['y_test_real']  # (55, 157, 41, 1)

print(f"\nüìä Datos de test ERA5:")
print(f"   Shape: {y_test.shape}")
print(f"   Rango: [{y_test.min():.2f}, {y_test.max():.2f}] mm/d√≠a")

# Para entrenar KoVAE necesitamos m√°s datos
# Vamos a usar los primeros 365 d√≠as como entrenamiento (placeholder)
# En producci√≥n, se deber√≠an cargar todos los datos hist√≥ricos ERA5 2019

# Por ahora, entrenamos solo con test set (demo)
X_train = y_test[:40]  # 40 muestras para entrenamiento
X_val = y_test[40:50]  # 10 muestras para validaci√≥n
X_test = y_test[50:]   # 5 muestras para test

print(f"\n‚úÖ Divisi√≥n de datos:")
print(f"   Train: {X_train.shape}")
print(f"   Val: {X_val.shape}")
print(f"   Test: {X_test.shape}")

In [None]:
# ====================================================================================
# 3. CONSTRUIR Y COMPILAR MODELO KoVAE
# ====================================================================================

# Par√°metros del modelo
SPATIAL_DIMS = (157, 41)  # Chile
LATENT_DIM = 64  # Mismo que AE+DMD para comparaci√≥n justa
BETA = 1.0  # Peso de KL divergence
GAMMA = 0.1  # Peso de t√©rmino Koopman

# Crear modelo
kovae = KoVAE(
    spatial_dims=SPATIAL_DIMS,
    latent_dim=LATENT_DIM,
    beta=BETA,
    gamma=GAMMA
)

# Construir arquitectura
kovae.build()

# Compilar
kovae.compile_model(learning_rate=1e-3)

print("\n‚úÖ Modelo KoVAE listo para entrenamiento")

In [None]:
# ====================================================================================
# 4. ENTRENAR MODELO
# ====================================================================================

print("üèãÔ∏è Entrenando KoVAE...")
print(f"   Epochs m√°ximo: 100")
print(f"   Batch size: 8")
print(f"   Early stopping: patience=15")

history = kovae.train(
    X_train=X_train,
    X_val=X_val,
    epochs=100,
    batch_size=8,
    patience=15
)

print("\n‚úÖ Entrenamiento completado")

In [None]:
# ====================================================================================
# 5. VISUALIZAR CURVAS DE ENTRENAMIENTO
# ====================================================================================

fig, ax = plt.subplots(1, 1, figsize=(10, 6))

ax.plot(history.history['loss'], label='Train Loss', linewidth=2)
if 'val_loss' in history.history:
    ax.plot(history.history['val_loss'], label='Val Loss', linewidth=2)

ax.set_xlabel('√âpoca', fontsize=12, fontweight='bold')
ax.set_ylabel('P√©rdida KoVAE', fontsize=12, fontweight='bold')
ax.set_title('Curvas de Entrenamiento KoVAE', fontsize=14, fontweight='bold')
ax.legend(fontsize=11)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(FIG_DIR / 'kovae_training_curves.png', dpi=150, bbox_inches='tight')
print(f"üíæ Guardado: {FIG_DIR / 'kovae_training_curves.png'}")
plt.show()

In [None]:
# ====================================================================================
# 6. EVALUAR RECONSTRUCCI√ìN
# ====================================================================================

print("üìä Evaluando reconstrucci√≥n...")

# Reconstruir datos de test
X_recon = kovae.vae.predict(X_test, verbose=0)

# M√©tricas de reconstrucci√≥n
mae_recon = np.mean(np.abs(X_test - X_recon))
rmse_recon = np.sqrt(np.mean((X_test - X_recon)**2))

print(f"\n‚úÖ M√©tricas de Reconstrucci√≥n:")
print(f"   MAE: {mae_recon:.4f} mm/d√≠a")
print(f"   RMSE: {rmse_recon:.4f} mm/d√≠a")

# Visualizar ejemplo
idx = 0
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

im1 = axes[0].imshow(X_test[idx, :, :, 0], cmap='Blues', vmin=0, vmax=20)
axes[0].set_title('Ground Truth', fontsize=12, fontweight='bold')
axes[0].axis('off')
plt.colorbar(im1, ax=axes[0], fraction=0.046)

im2 = axes[1].imshow(X_recon[idx, :, :, 0], cmap='Blues', vmin=0, vmax=20)
axes[1].set_title('Reconstrucci√≥n KoVAE', fontsize=12, fontweight='bold')
axes[1].axis('off')
plt.colorbar(im2, ax=axes[1], fraction=0.046)

error = np.abs(X_test[idx, :, :, 0] - X_recon[idx, :, :, 0])
im3 = axes[2].imshow(error, cmap='Reds', vmin=0, vmax=5)
axes[2].set_title('Error Absoluto', fontsize=12, fontweight='bold')
axes[2].axis('off')
plt.colorbar(im3, ax=axes[2], fraction=0.046)

plt.tight_layout()
plt.savefig(FIG_DIR / 'kovae_reconstruction.png', dpi=150, bbox_inches='tight')
print(f"üíæ Guardado: {FIG_DIR / 'kovae_reconstruction.png'}")
plt.show()

In [None]:
# ====================================================================================
# 7. PREDICCIONES PROBABIL√çSTICAS (Multistep)
# ====================================================================================

print("üîÆ Generando predicciones probabil√≠sticas...")

# Predecir 7 pasos con incertidumbre
predictions, uncertainties = kovae.predict_multistep(
    X_test[:3],  # 3 muestras de ejemplo
    n_steps=7
)

print(f"\n‚úÖ Predicciones generadas:")
print(f"   Predictions shape: {predictions.shape}")  # (3, 7, 157, 41, 1)
print(f"   Uncertainties shape: {uncertainties.shape}")
print(f"   Rango predicciones: [{predictions.min():.2f}, {predictions.max():.2f}]")
print(f"   Rango incertidumbres: [{uncertainties.min():.4f}, {uncertainties.max():.4f}]")

In [None]:
# ====================================================================================
# 8. VISUALIZAR PREDICCIONES CON INTERVALOS DE CONFIANZA
# ====================================================================================

# Seleccionar un punto espacial (Centro Chile)
lat_idx, lon_idx = 80, 20

# Serie temporal de predicciones con intervalos de confianza
sample_idx = 0
preds_point = predictions[sample_idx, :, lat_idx, lon_idx, 0]
uncert_point = uncertainties[sample_idx, :, lat_idx, lon_idx, 0]

# Intervalos de confianza 95% (¬±1.96œÉ)
upper_bound = preds_point + 1.96 * uncert_point
lower_bound = preds_point - 1.96 * uncert_point

fig, ax = plt.subplots(figsize=(10, 6))

x = np.arange(1, 8)
ax.plot(x, preds_point, 'o-', linewidth=2, markersize=8, 
        color='#2E86AB', label='Predicci√≥n KoVAE')
ax.fill_between(x, lower_bound, upper_bound, alpha=0.3, 
                 color='#2E86AB', label='IC 95%')

ax.set_xlabel('Horizonte (d√≠as)', fontsize=12, fontweight='bold')
ax.set_ylabel('Precipitaci√≥n (mm/d√≠a)', fontsize=12, fontweight='bold')
ax.set_title(f'Predicciones Probabil√≠sticas KoVAE\\nPunto: lat_idx={lat_idx}, lon_idx={lon_idx} (Centro Chile)',
             fontsize=13, fontweight='bold')
ax.legend(fontsize=11)
ax.grid(True, alpha=0.3)
ax.set_xticks(x)

plt.tight_layout()
plt.savefig(FIG_DIR / 'kovae_probabilistic_forecast.png', dpi=150, bbox_inches='tight')
print(f"üíæ Guardado: {FIG_DIR / 'kovae_probabilistic_forecast.png'}")
plt.show()

In [None]:
# ====================================================================================
# 9. COMPARACI√ìN KoVAE vs AE+DMD
# ====================================================================================

print("‚öñÔ∏è Comparando KoVAE vs AE+DMD...")

# Cargar predicciones AE+DMD
ae_dmd_preds = forecast_results['forecast_results']

# Comparar horizonte h=1 (para los √∫ltimos 5 d√≠as de test)
h1_ae_dmd = ae_dmd_preds[1]['predictions'][50:, :, :, :]  # √öltimas 5 muestras
h1_kovae = predictions[:, 0, :, :, :]  # Primer horizonte (h=1) de las 3 muestras

# Solo comparamos las primeras 3 que tenemos en KoVAE
y_true_comp = y_test[50:53]

mae_ae_dmd = np.mean(np.abs(y_true_comp - h1_ae_dmd[:3]))
mae_kovae = np.mean(np.abs(y_true_comp - h1_kovae[:3]))

print(f"\nüìä Comparaci√≥n MAE (h=1):") 
print(f"   AE+DMD: {mae_ae_dmd:.4f} mm/d√≠a")
print(f"   KoVAE: {mae_kovae:.4f} mm/d√≠a")
print(f"   Diferencia: {((mae_kovae - mae_ae_dmd) / mae_ae_dmd * 100):+.2f}%")

# Gr√°fico comparativo
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Ground truth
im0 = axes[0].imshow(y_true_comp[0, :, :, 0], cmap='Blues', vmin=0, vmax=20)
axes[0].set_title('Ground Truth', fontsize=12, fontweight='bold')
axes[0].axis('off')
plt.colorbar(im0, ax=axes[0], fraction=0.046)

# AE+DMD
im1 = axes[1].imshow(h1_ae_dmd[0, :, :, 0], cmap='Blues', vmin=0, vmax=20)
axes[1].set_title(f'AE+DMD (MAE={mae_ae_dmd:.3f})', fontsize=12, fontweight='bold')
axes[1].axis('off')
plt.colorbar(im1, ax=axes[1], fraction=0.046)

# KoVAE
im2 = axes[2].imshow(h1_kovae[0, :, :, 0], cmap='Blues', vmin=0, vmax=20)
axes[2].set_title(f'KoVAE (MAE={mae_kovae:.3f})', fontsize=12, fontweight='bold')
axes[2].axis('off')
plt.colorbar(im2, ax=axes[2], fraction=0.046)

plt.tight_layout()
plt.savefig(FIG_DIR / 'kovae_vs_aedmd_comparison.png', dpi=150, bbox_inches='tight')
print(f"üíæ Guardado: {FIG_DIR / 'kovae_vs_aedmd_comparison.png'}")
plt.show()

In [None]:
# ====================================================================================
# 10. GUARDAR MODELO
# ====================================================================================

model_save_path = MODEL_DIR / 'kovae_trained'
kovae.save(model_save_path)

print(f"\n‚úÖ Modelo KoVAE guardado en: {model_save_path}")
print(f"   Archivos generados:")
print(f"   - kovae_full.h5 (modelo completo)")
print(f"   - encoder.h5 (encoder)")
print(f"   - decoder.h5 (decoder)")
print(f"   - koopman_matrix.npy (matriz K)")
print(f"   - config.pkl (configuraci√≥n)")

In [None]:
# ====================================================================================
# 11. RESUMEN Y CONCLUSIONES
# ====================================================================================

print("\\n" + "="*80)
print("RESUMEN - Modelo KoVAE")
print("="*80)

print(f"\\n‚úÖ ENTRENAMIENTO:")
print(f"   Epochs ejecutados: {len(history.history['loss'])}")
print(f"   Loss final train: {history.history['loss'][-1]:.4f}")
if 'val_loss' in history.history:
    print(f"   Loss final val: {history.history['val_loss'][-1]:.4f}")

print(f"\\n‚úÖ RECONSTRUCCI√ìN:")
print(f"   MAE: {mae_recon:.4f} mm/d√≠a")
print(f"   RMSE: {rmse_recon:.4f} mm/d√≠a")

print(f"\\n‚úÖ PREDICCI√ìN (h=1):")
print(f"   MAE KoVAE: {mae_kovae:.4f} mm/d√≠a")
print(f"   MAE AE+DMD: {mae_ae_dmd:.4f} mm/d√≠a")
print(f"   Diferencia: {((mae_kovae - mae_ae_dmd) / mae_ae_dmd * 100):+.2f}%")

print(f"\\n‚úÖ VENTAJAS KoVAE:")
print(f"   - Cuantificaci√≥n de incertidumbre (intervalos de confianza)")
print(f"   - Predicciones probabil√≠sticas (distribuci√≥n completa)")
print(f"   - √ötil para an√°lisis de riesgo y toma de decisiones")

print(f"\\n‚ö†Ô∏è LIMITACIONES ACTUALES:")
print(f"   - Entrenado solo con {X_train.shape[0]} muestras (demo)")
print(f"   - Para producci√≥n: entrenar con dataset completo 2019")
print(f"   - Optimizar hiperpar√°metros (beta, gamma, latent_dim)")

print(f"\\nüí° PR√ìXIMOS PASOS:")
print(f"   1. Entrenar con dataset completo ERA5 2019")
print(f"   2. Ajustar beta (KL) y gamma (Koopman)")
print(f"   3. Generar muestras m√∫ltiples para cuantificaci√≥n robusta")
print(f"   4. Comparar intervalos de confianza con observaciones")