# üåßÔ∏è Timer-XL Peru Rainfall Prediction

## 1. Setup Environment

In [None]:
!nvidia-smi

In [None]:
!git clone https://github.com/ChristianPE1/test-openltm-code.git
%cd test-openltm-code

In [None]:
from google.colab import drive
drive.mount('/content/drive')

print("‚úÖ Google Drive mounted")

## 2. Verificar Datos ERA5

In [None]:
!ls -lh datasets/raw_era5/


## 3. Preprocess Data

In [None]:

!python preprocessing/preprocess_era5_peru.py \
    --input_dir datasets/raw_era5 \
    --output_dir datasets/processed \
    --years 2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024 \
    --target_horizon 24 \
    --threshold 0.0001

print("\n‚úÖ Preproces complete!")

In [None]:
# Load processed data for quick inspection
import pandas as pd
import json

df = pd.read_csv('datasets/processed/peru_rainfall.csv')
print(f"Dataset shape: {df.shape}")
print(f"\nFirst few rows:")
print(df.head())

# Load statistics
with open('datasets/processed/preprocessing_stats.json') as f:
    stats = json.load(f)
print(f"\nStatistics:")
print(json.dumps(stats, indent=2))

## 4. Train Timer-XL

In [None]:
# Copy pre-trained checkpoint from Google Drive
import os

checkpoint_dir = 'checkpoints/timer_xl'
checkpoint_path = f'{checkpoint_dir}/checkpoint.pth'


!mkdir -p checkpoints/timer_xl/

!cp '/content/drive/MyDrive/timer_xl_peru/checkpoints/checkpoint.pth' \
    checkpoints/timer_xl/

## Transfer Learning clasification

In [None]:
!python run.py \
  --task_name classification \
  --is_training 1 \
  --model_id peru_rainfall_timerxl_11years \
  --model timer_xl_classifier \
  --data PeruRainfall \
  --root_path datasets/processed/ \
  --data_path peru_rainfall_cleaned.csv \
  --checkpoints checkpoints/ \
  --seq_len 1440 \
  --input_token_len 96 \
  --output_token_len 96 \
  --test_seq_len 1440 \
  --test_pred_len 2 \
  --e_layers 8 \
  --d_model 1024 \
  --d_ff 2048 \
  --n_heads 8 \
  --dropout 0.2 \
  --activation relu \
  --batch_size 16 \
  --learning_rate 5e-5 \
  --train_epochs 30 \
  --patience 8 \
  --n_classes 2 \
  --gpu 0 \
  --cosine \
  --tmax 30 \
  --adaptation \
  --pretrain_model_path checkpoints/timer_xl/checkpoint.pth \
  --use_focal_loss \
  --loss CE \
  --itr 1 \
  --des 'Peru_Rainfall_Transfer_Learning_11Years_2014_2024'

print("\n‚úÖ Training complete!")
print("üìä Results saved to: checkpoints/peru_rainfall_timerxl_11years/")

## üî¨ Smaller Model

In [None]:

!python run.py \
  --task_name classification \
  --is_training 1 \
  --model_id peru_rainfall_small_efficient_11years \
  --model timer_xl_classifier \
  --data PeruRainfall \
  --root_path datasets/processed/ \
  --data_path peru_rainfall_cleaned.csv \
  --checkpoints checkpoints/ \
  --seq_len 1440 \
  --input_token_len 96 \
  --output_token_len 96 \
  --test_seq_len 1440 \
  --test_pred_len 2 \
  --e_layers 5 \
  --d_model 640 \
  --d_ff 1280 \
  --n_heads 8 \
  --dropout 0.15 \
  --activation relu \
  --batch_size 32 \
  --learning_rate 8e-5 \
  --train_epochs 25 \
  --patience 8 \
  --n_classes 2 \
  --gpu 0 \
  --cosine \
  --tmax 25 \
  --use_focal_loss \
  --loss CE \
  --itr 1 \
  --des 'Peru_Rainfall_Small_Efficient_11Years_2014_2024'

print("\n‚úÖ Training complete!")
print("üìä Results saved to: checkpoints/peru_rainfall_small_improved_11years/")

## Focal Loss + Regularizaci√≥n Optimizada


In [None]:

import glob
import os

!python run.py \
    --task_name classification \
    --is_training 1 \
    --model_id peru_rainfall_focal_optimized_v2 \
    --model timer_xl_classifier \
    --data PeruRainfall \
    --root_path datasets/processed/ \
    --data_path peru_rainfall_cleaned.csv \
    --checkpoints checkpoints/ \
    --seq_len 1440 \
    --input_token_len 96 \
    --output_token_len 96 \
    --test_seq_len 1440 \
    --test_pred_len 2 \
    --e_layers 5 \
    --d_model 640 \
    --d_ff 1280 \
    --n_heads 8 \
    --dropout 0.20 \
    --activation relu \
    --batch_size 40 \
    --learning_rate 6e-5 \
    --train_epochs 15 \
    --patience 4 \
    --n_classes 2 \
    --gpu 0 \
    --cosine \
    --tmax 15 \
    --use_focal_loss \
    --focal_alpha 0.70 \
    --focal_gamma 2.8 \
    --loss CE \
    --itr 1 \
    --des 'Peru_Focal_Optimized_V2_F182'

print("\n" + "="*80)
print("‚úÖ Entrenamiento completo!")

## Focal Loss Avanzado + Class Weights Din√°micos


In [None]:
# üéØ ESTRATEGIA 1: Focal Loss Agresivo + Regularizaci√≥n Optimizada
# Target: F1 > 85% con Recall No Rain > 75%

!python run.py \
    --task_name classification \
    --is_training 1 \
    --model_id peru_rainfall_focal_aggressive_v3 \
    --model timer_xl_classifier \
    --data PeruRainfall \
    --root_path datasets/processed/ \
    --data_path peru_rainfall_cleaned.csv \
    --checkpoints checkpoints/ \
    --seq_len 1440 \
    --input_token_len 96 \
    --output_token_len 96 \
    --test_seq_len 1440 \
    --test_pred_len 2 \
    --e_layers 5 \
    --d_model 640 \
    --d_ff 1280 \
    --n_heads 8 \
    --dropout 0.22 \
    --activation relu \
    --batch_size 40 \
    --learning_rate 7e-5 \
    --train_epochs 20 \
    --patience 5 \
    --n_classes 2 \
    --gpu 0 \
    --cosine \
    --tmax 20 \
    --use_focal_loss \
    --focal_alpha 0.75 \
    --focal_gamma 3.2 \
    --loss CE \
    --itr 1 \
    --des 'Peru_Focal_Aggressive_V3_Target_F185'

print("\n" + "="*80)
print("‚úÖ ESTRATEGIA 1 completada!")
print("üìä CAMBIOS vs V2:")
print("   ‚Ä¢ focal_alpha: 0.70 ‚Üí 0.75 (+7% peso clase No Rain)")
print("   ‚Ä¢ focal_gamma: 2.8 ‚Üí 3.2 (+14% penalizaci√≥n ejemplos f√°ciles)")
print("   ‚Ä¢ learning_rate: 6e-5 ‚Üí 7e-5 (+17% velocidad convergencia)")
print("   ‚Ä¢ dropout: 0.20 ‚Üí 0.22 (+10% regularizaci√≥n)")
print("   ‚Ä¢ patience: 4 ‚Üí 5 (m√°s tolerancia)")
print("="*80)

## Arquitectura M√°s Profunda

In [None]:
# üèóÔ∏è ESTRATEGIA 2: Modelo M√°s Profundo (7 layers)
# Target: Mejor captura de patrones ENSO temporales complejos

!python run.py \
    --task_name classification \
    --is_training 1 \
    --model_id peru_rainfall_deep_model_v1 \
    --model timer_xl_classifier \
    --data PeruRainfall \
    --root_path datasets/processed/ \
    --data_path peru_rainfall_cleaned.csv \
    --checkpoints checkpoints/ \
    --seq_len 1440 \
    --input_token_len 96 \
    --output_token_len 96 \
    --test_seq_len 1440 \
    --test_pred_len 2 \
    --e_layers 7 \
    --d_model 768 \
    --d_ff 1536 \
    --n_heads 8 \
    --dropout 0.18 \
    --activation relu \
    --batch_size 32 \
    --learning_rate 5e-5 \
    --train_epochs 18 \
    --patience 5 \
    --n_classes 2 \
    --gpu 0 \
    --cosine \
    --tmax 18 \
    --use_focal_loss \
    --focal_alpha 0.72 \
    --focal_gamma 2.9 \
    --loss CE \
    --itr 1 \
    --des 'Peru_Deep_Model_7Layers_768Dim'

print("\n" + "="*80)
print("‚úÖ ESTRATEGIA 2 completada!")
print("üß† ARQUITECTURA PROFUNDA:")
print("   ‚Ä¢ Layers: 5 ‚Üí 7 (+40%)")
print("   ‚Ä¢ d_model: 640 ‚Üí 768 (+20%)")
print("   ‚Ä¢ d_ff: 1280 ‚Üí 1536 (+20%)")
print("   ‚Ä¢ Par√°metros totales: ~8M ‚Üí ~14M")
print("="*80)

---

# üåä REGRESI√ìN (Rainfall Forecasting)

---

### ¬øQu√© es un "buen" resultado en Rainfall Regression?

Basado en literatura (ERA5, IMERG precipitation):

| M√©trica | Excelente | Bueno | Aceptable |
|---------|-----------|-------|-----------|
| **RMSE** | < 2.0 mm | < 3.5 mm | < 5.0 mm |
| **MAE** | < 1.5 mm | < 2.5 mm | < 3.5 mm |
| **R¬≤ Score** | > 0.70 | > 0.55 | > 0.40 |

## Preparar Datos desde Archivos .nc (DESDE CERO)
**Script**: `preprocess_era5_regression.py` (procesa .nc ‚Üí CSV regresi√≥n)

In [None]:
# üìä REGRESI√ìN v1: Transfer Learning Directo (Forecasting)
# Usa checkpoint pre-entrenado en su tarea original (regresi√≥n)
# ‚ö†Ô∏è USA EL CSV LIMPIO (peru_rainfall_regression_cleaned.csv)

!python run.py \
    --task_name long_term_forecast \
    --is_training 1 \
    --model_id peru_rainfall_regression_baseline \
    --model timer_xl \
    --data PeruRainfall \
    --root_path datasets/processed/ \
    --data_path peru_rainfall_regression_cleaned.csv \
    --checkpoints checkpoints/ \
    --seq_len 1440 \
    --label_len 720 \
    --pred_len 24 \
    --input_token_len 96 \
    --output_token_len 96 \
    --e_layers 5 \
    --d_model 640 \
    --d_ff 1280 \
    --n_heads 8 \
    --dropout 0.15 \
    --activation relu \
    --batch_size 32 \
    --learning_rate 5e-5 \
    --train_epochs 20 \
    --patience 6 \
    --gpu 0 \
    --cosine \
    --tmax 20 \
    --adaptation \
    --pretrain_model_path checkpoints/timer_xl/checkpoint.pth \
    --loss MSE \
    --itr 1 \
    --des 'Peru_Rainfall_Regression_24h_Forecast'

print("\n" + "="*80)
print("‚úÖ REGRESI√ìN completada!")
print("="*80)

## Preparaci√≥n de Datos para Regresi√≥n

In [None]:
# üîß PASO 1: Preparar datos de REGRESI√ìN desde archivos .nc (DESDE CERO)
# ‚ö†Ô∏è NO usa peru_rainfall_cleaned.csv (tiene datos binarizados)
# ‚úÖ Procesa directamente archivos .nc ‚Üí valores continuos (mm)

print("="*80)
print("üåä PASO 1/2: PREPARANDO DATOS DE REGRESI√ìN DESDE ARCHIVOS .NC")
print("="*80)
print("\n‚ö†Ô∏è  DIFERENCIAS vs CLASIFICACI√ìN:")
print("   ‚ùå NO usa peru_rainfall_cleaned.csv (datos binarizados)")
print("   ‚úÖ Procesa directamente archivos .nc")
print("   ‚úÖ Convierte METROS ‚Üí MIL√çMETROS (√ó1000)")
print("   ‚úÖ Target: 'target_precip_24h' (continuo, NO binario)")
print("\n" + "="*80 + "\n")

!python preprocessing/preprocess_era5_regression.py \
    --input_dir datasets/raw_era5 \
    --output_dir datasets/processed \
    --years 2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024 \
    --target_horizon 24

print("\n‚úÖ Datos RAW de regresi√≥n generados!")
print("üìä Verificar estad√≠sticas:")

import json
with open('datasets/processed/regression_stats.json') as f:
    stats = json.load(f)
    print(json.dumps(stats, indent=2))

# üßπ PASO 2: Limpiar datos de REGRESI√ìN (manejo de NaN, outliers)
# ‚ö†Ô∏è DIFERENTE a clasificaci√≥n (m√°s agresivo con NaN)

print("\n" + "="*80)
print("üßπ PASO 2/2: LIMPIANDO DATOS DE REGRESI√ìN")
print("="*80)
print("\n‚ö†Ô∏è  DIFERENCIAS vs LIMPIEZA DE CLASIFICACI√ìN:")
print("   ‚úÖ Interpolaci√≥n temporal (mejor para series continuas)")
print("   ‚úÖ Detecci√≥n de outliers (valores extremos)")
print("   ‚úÖ Validaci√≥n de valores continuos (no binarios)")
print("   ‚úÖ Remoci√≥n agresiva de NaN (regresi√≥n es sensible)")
print("\n" + "="*80 + "\n")

!python preprocessing/clean_regression_data.py \
    --input_path datasets/processed/peru_rainfall_regression.csv \
    --output_path datasets/processed/peru_rainfall_regression_cleaned.csv \
    --max_precip 200.0

print("\n‚úÖ Datos de regresi√≥n LIMPIOS y listos para entrenamiento!")
print("\nüìä VERIFICACI√ìN FINAL:")

# Verificar que los datos sean continuos (NO binarios)
import pandas as pd
df_clean = pd.read_csv('datasets/processed/peru_rainfall_regression_cleaned.csv')

print(f"\nüéØ Target variable (target_precip_24h):")
print(f"   Shape: {df_clean.shape}")
print(f"   Unique values: {df_clean['target_precip_24h'].nunique()}")
print(f"   Range: [{df_clean['target_precip_24h'].min():.3f}, {df_clean['target_precip_24h'].max():.3f}] mm")
print(f"   Mean: {df_clean['target_precip_24h'].mean():.3f} mm")
print(f"   NaN count: {df_clean.isnull().sum().sum()}")

# Validaci√≥n cr√≠tica
unique_count = df_clean['target_precip_24h'].nunique()
if unique_count < 100:
    print(f"\n‚ùå ERROR: Solo {unique_count} valores √∫nicos (datos binarios, no continuos)")
else:
    print(f"\n‚úÖ CORRECTO: {unique_count} valores √∫nicos (datos continuos)")
    print("‚úÖ Listo para entrenamiento de regresi√≥n")

## Clasificaci√≥n vs Regresi√≥n

In [None]:
# üìä Comparaci√≥n de Resultados: Clasificaci√≥n vs Regresi√≥n

import pandas as pd
import numpy as np

print("="*80)
print("üìä COMPARACI√ìN DE ENFOQUES")
print("="*80)

# Tabla de comparaci√≥n (debes llenar despu√©s de entrenar)
comparison = {
    'M√©trica': [
        'Tiempo entrenamiento',
        'F1-Score / RMSE',
        'Recall / MAE',
        'Precision / R¬≤',
        'Interpretabilidad',
        'Transfer Learning',
        'Comparabilidad papers'
    ],
    'Clasificaci√≥n (V2)': [
        '~1.5 horas',
        '83.24%',
        'Rain: 83% | No Rain: 71%',
        'Rain: 83% | No Rain: 71%',
        'Alta (binario)',
        'Regular (cambio de dominio)',
        'Dif√≠cil (pocas referencias)'
    ],
    'Regresi√≥n (Baseline)': [
        '~1.8 horas',
        'PENDING (ejecutar)',
        'PENDING (ejecutar)',
        'PENDING (ejecutar)',
        'Muy alta (mm/d√≠a)',
        'Excelente (mismo dominio)',
        'F√°cil (benchmarks ERA5)'
    ]
}

df_comp = pd.DataFrame(comparison)
print("\n" + df_comp.to_string(index=False))

print("\n" + "="*80)
print("üí° RECOMENDACI√ìN PARA TESIS:")
print("="*80)
print("""
1. **Enfoque Principal: REGRESI√ìN**
   - Mejor aprovecha pre-training de Timer-XL
   - M√©tricas comparables con literatura (RMSE/MAE)
   - Preserva informaci√≥n de intensidad (√∫til para ENSO extremos)

2. **Enfoque Secundario: CLASIFICACI√ìN**
   - √ötil para aplicaciones pr√°cticas (alertas tempranas)
   - Complementa an√°lisis de regresi√≥n
   - Puede convertir predicciones regresi√≥n ‚Üí clasificaci√≥n (umbral)

3. **Estructura de Tesis**:
   Cap√≠tulo 4: Resultados
   - 4.1 Rainfall Forecasting (Regresi√≥n) - PRINCIPAL
   - 4.2 Rain Detection (Clasificaci√≥n) - COMPLEMENTARIO
   - 4.3 ENSO-aware Analysis (ambos enfoques)
   - 4.4 Regional Analysis (ambos enfoques)

‚úÖ **VENTAJA**: Dos l√≠neas de evaluaci√≥n hacen tu tesis m√°s robusta.
""")

print("\nüìå PR√ìXIMOS PASOS:")
print("   1. Ejecutar celda de preparaci√≥n de datos regresi√≥n")
print("   2. Ejecutar celda REGRESI√ìN v1")
print("   3. Comparar RMSE regresi√≥n vs F1 clasificaci√≥n")
print("   4. Elegir enfoque principal para tesis seg√∫n resultados")

## üåä FASE 2: Validaci√≥n ENSO-aware (Core de tu Tesis)

**validar**:
1. **H1**: F1 > 0.75 en TODAS las fases (El Ni√±o, La Ni√±a, Neutral)
2. **H2**: |F1_ElNi√±o - F1_LaNi√±a| < 0.15 (consistencia)
3. **H3**: F1_ElNi√±o ‚â• F1_Neutral AND F1_LaNi√±a ‚â• F1_Neutral

In [None]:

import glob
import os

print("="*80)
print("üåä FASE 2: VALIDACI√ìN ENSO-AWARE")
print("="*80)

# Buscar mejor checkpoint de FASE 1
checkpoint_pattern = "checkpoints/classification_peru_rainfall_focal_rescue_v1_*/checkpoint.pth"
checkpoints = glob.glob(checkpoint_pattern)

if not checkpoints:
    print("\n‚ùå ERROR: No se encontr√≥ checkpoint de FASE 1")
    print("   Ejecuta primero la celda de FASE 1 (Rescate del Modelo)")
    print("   Debe generar un checkpoint con F1 > 0.80\n")
else:
    checkpoints.sort(key=os.path.getmtime, reverse=True)
    CHECKPOINT_PATH = checkpoints[0]
    CHECKPOINT_DIR = os.path.dirname(CHECKPOINT_PATH)
    
    print(f"\n‚úÖ Checkpoint encontrado: {CHECKPOINT_DIR}")
    
    # Nota: validate_enso_phases.py requiere integraci√≥n con tu pipeline
    # Por ahora, ejecuta el test normal y guarda predicciones
    
    print("\n? PASO 1: Generar predicciones del modelo...")
    print("   (Debes ejecutar el test y guardar predicciones con timestamps)")
    
    # Ejecutar test y guardar predicciones
    !python test_checkpoint_standalone.py \
        --checkpoint_path $CHECKPOINT_PATH \
        --save_predictions \
        --output_dir results/enso_validation
    
    print("\nüìä PASO 2: Ejecutar an√°lisis ENSO-aware...")
    
    # ‚ö†Ô∏è REQUIERE ADAPTACI√ìN: validate_enso_phases.py necesita acceso a predicciones
    # Por ahora, placeholder - debes integrar con tu pipeline
    
    print("\nüí° SIGUIENTE PASO:")
    print("   1. Revisa el archivo de predicciones generado")
    print("   2. A√±ade columna 'enso_phase' al CSV de predicciones")
    print("   3. Ejecuta: !python validate_enso_phases.py \\")
    print("              --data_path results/enso_validation/predictions_with_phases.csv \\")
    print("              --output_dir results/enso_validation")
    
    print("\nüìä M√âTRICAS ESPERADAS:")
    print("   ‚úÖ F1 El Ni√±o > 0.75")
    print("   ‚úÖ F1 La Ni√±a > 0.75")
    print("   ‚úÖ F1 Neutral > 0.75")
    print("   ‚úÖ |F1_ElNi√±o - F1_LaNi√±a| < 0.15")
    
    print("\nüìÅ Resultados se guardar√°n en: results/enso_validation/")
    print("   - enso_f1_comparison.png")
    print("   - enso_confusion_matrices.png")
    print("   - enso_validation_report.txt")

## üó∫Ô∏è FASE 3: An√°lisis Regional (Costa Norte vs Centro vs Sur)

**Objetivo**: Validar gradiente de influencia ENSO.

**Hip√≥tesis a validar**:
1. **H4**: F1_Norte > F1_Centro > F1_Sur (gradiente ENSO)
2. **H5**: Rain_prevalence_Norte > Rain_prevalence_Sur

**Requisito previo**: Haber completado FASE 1 y FASE 2

In [None]:
# üó∫Ô∏è FASE 3: Ejecutar An√°lisis Regional
# ‚ö†Ô∏è REQUIERE: Datos con coordenadas geogr√°ficas (latitud, longitud)

import glob
import os

print("="*80)
print("üó∫Ô∏è FASE 3: AN√ÅLISIS REGIONAL")
print("="*80)

# Verificar que existan predicciones con coordenadas
predictions_file = "results/enso_validation/predictions_with_coords.csv"

if not os.path.exists(predictions_file):
    print("\n‚ö†Ô∏è NOTA: Se requiere CSV con predicciones + coordenadas")
    print("   Columnas necesarias:")
    print("   - timestamp")
    print("   - latitude (para asignar regi√≥n)")
    print("   - rain_24h (label verdadero)")
    print("   - pred_label (predicci√≥n del modelo)")
    print("   - pred_proba_rain (probabilidad clase Rain)")
    
    print("\nüí° CREAR CSV:")
    print("   1. Cargar datos originales (peru_rainfall_cleaned.csv)")
    print("   2. A√±adir columnas de predicci√≥n del modelo")
    print("   3. Guardar como predictions_with_coords.csv")
    
    print("\nüìä REGIONES (basado en latitud):")
    print("   - Costa Norte (-8¬∞ a -4¬∞): Piura, Tumbes, Lambayeque")
    print("   - Costa Centro (-14¬∞ a -8¬∞): Lima, Callao, Ica")
    print("   - Costa Sur (-18¬∞ a -14¬∞): Arequipa, Moquegua, Tacna")
else:
    print(f"\n‚úÖ Archivo de predicciones encontrado: {predictions_file}")
    
    print("\nüìä Ejecutando an√°lisis regional...")
    
    !python validate_regional.py \
        --data_path $predictions_file \
        --output_dir results/regional_analysis
    
    print("\n" + "="*80)
    print("‚úÖ AN√ÅLISIS REGIONAL COMPLETADO")
    print("="*80)
    
    print("\nüìä VERIFICAR HIP√ìTESIS:")
    print("   ‚úÖ H4: ¬øF1_Norte > F1_Centro > F1_Sur?")
    print("   ‚úÖ H5: ¬øRain_prevalence_Norte > Rain_prevalence_Sur?")
    
    print("\nüìÅ Resultados guardados en: results/regional_analysis/")
    print("   - regional_comparison.png")
    print("   - regional_confusion_matrices.png")
    print("   - regional_analysis_report.txt")
    
    print("\nüí° INTERPRETACI√ìN:")
    print("   Si H4 se cumple ‚Üí Timer-XL captura gradiente ENSO ‚úÖ")
    print("   Si H4 NO se cumple ‚Üí Requiere features ENSO expl√≠citos ‚ö†Ô∏è")

## TEST ANY CHECKPOINT

In [None]:
!python test_checkpoint_standalone.py --find_latest

print("\n" + "="*80)
print("Para testear un checkpoint espec√≠fico, usa:")
print("   !python test_checkpoint_standalone.py --checkpoint_path 'ruta/al/checkpoint.pth'")
print("="*80)

## üíæ GUARDAR CHECKPOINTS ANTES DE DESCONECTAR

In [None]:
# üíæ Backup autom√°tico de checkpoints a Google Drive
# Ejecuta esta celda ANTES de desconectar Colab para guardar todo tu progreso

import shutil
import os
import glob
from datetime import datetime

print("="*80)
print("üíæ GUARDANDO CHECKPOINTS A GOOGLE DRIVE")
print("="*80 + "\n")

# Directorio de destino en Drive
drive_backup = '/content/drive/MyDrive/timer_xl_peru/checkpoints_backup/'
os.makedirs(drive_backup, exist_ok=True)

# Timestamp para identificar este backup
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# Buscar TODOS los checkpoints generados
checkpoint_patterns = [
    'checkpoints/classification_peru_rainfall_timerxl_11years_*/',
    'checkpoints/classification_peru_rainfall_small_improved_11years_*/',
    'checkpoints/classification_peru_rainfall_timerxl_*/',
    'checkpoints/classification_peru_rainfall_small_*/'
]

saved_models = []

for pattern in checkpoint_patterns:
    matching_dirs = glob.glob(pattern)
    
    for checkpoint_dir in matching_dirs:
        checkpoint_path = os.path.join(checkpoint_dir, 'checkpoint.pth')
        
        if os.path.exists(checkpoint_path):
            # Nombre descriptivo para el backup
            model_name = os.path.basename(checkpoint_dir.rstrip('/'))
            backup_name = f"{model_name}_{timestamp}.pth"
            backup_path = os.path.join(drive_backup, backup_name)
            
            # Copiar checkpoint
            print(f"üì¶ Guardando: {model_name}")
            print(f"   Origen: {checkpoint_path}")
            print(f"   Destino: {backup_path}")
            
            try:
                shutil.copy2(checkpoint_path, backup_path)
                
                # Obtener tama√±o del archivo
                size_mb = os.path.getsize(backup_path) / (1024**2)
                print(f"   ‚úÖ Guardado exitoso ({size_mb:.1f} MB)\n")
                
                saved_models.append({
                    'name': model_name,
                    'path': backup_path,
                    'size_mb': size_mb
                })
                
            except Exception as e:
                print(f"   ‚ùå Error: {e}\n")

# Resumen final
print("="*80)
print("üìä RESUMEN DEL BACKUP")
print("="*80)

if saved_models:
    print(f"\n‚úÖ {len(saved_models)} checkpoint(s) guardado(s):\n")
    
    total_size = 0
    for model in saved_models:
        print(f"   ‚Ä¢ {model['name']}")
        print(f"     Tama√±o: {model['size_mb']:.1f} MB")
        print(f"     Ubicaci√≥n: {model['path']}\n")
        total_size += model['size_mb']
    
    print(f"üíæ Tama√±o total: {total_size:.1f} MB")
    print(f"üìÅ Directorio: {drive_backup}")
    
    # Guardar tambi√©n metadata
    metadata_path = os.path.join(drive_backup, f'backup_metadata_{timestamp}.txt')
    with open(metadata_path, 'w') as f:
        f.write(f"Backup realizado: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
        f.write(f"Total checkpoints: {len(saved_models)}\n")
        f.write(f"Tama√±o total: {total_size:.1f} MB\n\n")
        f.write("Checkpoints guardados:\n")
        for model in saved_models:
            f.write(f"  - {model['name']} ({model['size_mb']:.1f} MB)\n")
    
    print(f"\nüìÑ Metadata guardada: {metadata_path}")
    
else:
    print("\n‚ö†Ô∏è No se encontraron checkpoints para guardar.")

print("\n" + "="*80)
print("üéâ BACKUP COMPLETADO - Ya puedes desconectar Colab de forma segura")
print("="*80)