# üîÑ GRU - Predicci√≥n de Series Temporales y Comparaci√≥n con LSTM
## Gated Recurrent Unit: Alternativa Eficiente a LSTM

---

### Objetivos:
1. Implementar modelo GRU con arquitectura similar a LSTM
2. Entrenar y evaluar el modelo
3. Comparar rendimiento con LSTM (accuracy, tiempo, par√°metros)
4. Determinar cu√°l modelo es m√°s eficiente para este problema

**Autor**: Adonnay Bazaldua  
**Fecha**: Noviembre 2025

## Teor√≠a: GRU vs LSTM

### LSTM (Long Short-Term Memory)
- **Compuertas**: 3 (Input, Forget, Output)
- **Estados**: Cell state + Hidden state
- **Par√°metros**: M√°s pesado
- **Ventaja**: Mejor para secuencias muy largas

### GRU (Gated Recurrent Unit)
- **Compuertas**: 2 (Reset, Update)
- **Estados**: Solo Hidden state
- **Par√°metros**: ~25% menos que LSTM
- **Ventaja**: M√°s r√°pido, menos propenso a overfitting

**Pregunta clave**: ¬øEl trade-off de eficiencia vale la pena para nuestro caso?

In [None]:
# Importaciones (igual que LSTM)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, callbacks
import numpy as np
import pandas as pd
import pickle
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
import time
import warnings
warnings.filterwarnings('ignore')

np.random.seed(42)
tf.random.set_seed(42)

print(f"‚úÖ TensorFlow version: {tf.__version__}")

In [None]:
# Cargar datos preprocesados (mismos que LSTM para comparaci√≥n justa)
print("üìÇ Cargando datos de series temporales...\n")

df_timeseries = pd.read_csv('processed_data/timeseries_data.csv')
df_timeseries['fecha'] = pd.to_datetime(df_timeseries['fecha'])
df_timeseries = df_timeseries.sort_values('fecha').reset_index(drop=True)

print(f"‚úÖ Datos cargados: {len(df_timeseries)} d√≠as")

In [None]:
# Preparar secuencias (funci√≥n reutilizable)
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)

# Preparar datos
data = df_timeseries['total_delitos'].values.reshape(-1, 1)
scaler_gru = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler_gru.fit_transform(data)

SEQ_LENGTH = 30
X_seq, y_seq = create_sequences(data_scaled, SEQ_LENGTH)

# Divisi√≥n 70/15/15
train_size = int(0.70 * len(X_seq))
val_size = int(0.15 * len(X_seq))

X_train_gru = X_seq[:train_size]
y_train_gru = y_seq[:train_size]
X_val_gru = X_seq[train_size:train_size + val_size]
y_val_gru = y_seq[train_size:train_size + val_size]
X_test_gru = X_seq[train_size + val_size:]
y_test_gru = y_seq[train_size + val_size:]

print(f"‚úÖ Secuencias creadas: {X_seq.shape}")
print(f"   Train: {X_train_gru.shape[0]} | Val: {X_val_gru.shape[0]} | Test: {X_test_gru.shape[0]}")

## Construcci√≥n del Modelo GRU

### Arquitectura (paralela a LSTM):
```
Input(30, 1)
  ‚Üí GRU(128, return_sequences=True) ‚Üí Dropout(0.2)
  ‚Üí GRU(64) ‚Üí Dropout(0.2)
  ‚Üí Dense(32, activation='relu')
  ‚Üí Dense(1, activation='linear')
```

In [None]:
def create_gru_model(seq_length, n_features, learning_rate=0.001):
    model = models.Sequential([
        layers.Input(shape=(seq_length, n_features)),
        
        # GRU layers (en lugar de LSTM)
        layers.GRU(128, return_sequences=True, activation='tanh'),
        layers.Dropout(0.2),
        
        layers.GRU(64, activation='tanh'),
        layers.Dropout(0.2),
        
        layers.Dense(32, activation='relu'),
        layers.Dense(1, activation='linear')
    ], name='GRU_Crime_Predictor')
    
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss='mse',
        metrics=['mae', 'mse']
    )
    
    return model

# Crear modelo
print("üèóÔ∏è Construyendo modelo GRU...\n")
gru_model = create_gru_model(seq_length=SEQ_LENGTH, n_features=1)

gru_model.summary()

total_params_gru = gru_model.count_params()
print(f"\nüìä Total de par√°metros GRU: {total_params_gru:,}")

# Cargar LSTM para comparaci√≥n
try:
    with open('models/lstm_results.pkl', 'rb') as f:
        lstm_results = pickle.load(f)
    lstm_params = lstm_results['num_parameters']
    print(f"üìä Total de par√°metros LSTM: {lstm_params:,}")
    print(f"   Reducci√≥n: {(1 - total_params_gru/lstm_params)*100:.1f}% menos par√°metros")
except:
    print("‚ö†Ô∏è No se encontr√≥ modelo LSTM para comparar")

In [None]:
# Callbacks
callbacks_list = [
    callbacks.EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1),
    callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=7, min_lr=1e-7, verbose=1),
    callbacks.ModelCheckpoint('models/gru_best.keras', monitor='val_mae', save_best_only=True, verbose=1)
]

# Entrenar y medir tiempo
print("üöÄ Iniciando entrenamiento GRU...\n")
start_time = time.time()

history_gru = gru_model.fit(
    X_train_gru, y_train_gru,
    batch_size=32,
    epochs=100,
    validation_data=(X_val_gru, y_val_gru),
    callbacks=callbacks_list,
    verbose=1
)

training_time_gru = time.time() - start_time
print(f"\n‚è±Ô∏è Tiempo de entrenamiento GRU: {training_time_gru:.2f} segundos ({training_time_gru/60:.2f} min)")

In [None]:
# Evaluar
print("üìä Evaluando modelo GRU...\n")

y_pred_gru = gru_model.predict(X_test_gru, verbose=0)

# Desnormalizar
y_pred_gru_inv = scaler_gru.inverse_transform(y_pred_gru)
y_test_gru_inv = scaler_gru.inverse_transform(y_test_gru.reshape(-1, 1))

# M√©tricas
def calculate_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return mae, rmse, r2, mape

gru_metrics = calculate_metrics(y_test_gru_inv, y_pred_gru_inv)

print("üéØ Resultados GRU:")
print(f"   MAE: {gru_metrics[0]:.2f}")
print(f"   RMSE: {gru_metrics[1]:.2f}")
print(f"   R¬≤: {gru_metrics[2]:.4f}")
print(f"   MAPE: {gru_metrics[3]:.2f}%")

## Comparaci√≥n Directa: GRU vs LSTM

In [None]:
# Tabla comparativa
try:
    comparison_df = pd.DataFrame({
        'M√©trica': ['MAE', 'RMSE', 'R¬≤', 'MAPE (%)', 'Par√°metros', 'Tiempo (min)', '√âpocas'],
        'LSTM': [
            lstm_results['test_mae'],
            lstm_results['test_rmse'],
            lstm_results['test_r2'],
            lstm_results['test_mape'],
            lstm_params,
            'N/A',  # No guardamos tiempo de LSTM
            lstm_results['num_epochs_trained']
        ],
        'GRU': [
            gru_metrics[0],
            gru_metrics[1],
            gru_metrics[2],
            gru_metrics[3],
            total_params_gru,
            f"{training_time_gru/60:.2f}",
            len(history_gru.history['loss'])
        ],
        'Diferencia': [
            f"{((gru_metrics[0] - lstm_results['test_mae'])/lstm_results['test_mae']*100):+.1f}%",
            f"{((gru_metrics[1] - lstm_results['test_rmse'])/lstm_results['test_rmse']*100):+.1f}%",
            f"{((gru_metrics[2] - lstm_results['test_r2'])/lstm_results['test_r2']*100):+.1f}%",
            f"{((gru_metrics[3] - lstm_results['test_mape'])/lstm_results['test_mape']*100):+.1f}%",
            f"{((total_params_gru - lstm_params)/lstm_params*100):+.1f}%",
            'N/A',
            f"{len(history_gru.history['loss']) - lstm_results['num_epochs_trained']:+d}"
        ]
    })
    
    print("\n" + "="*80)
    print(" "*30 + "LSTM vs GRU")
    print("="*80)
    print(comparison_df.to_string(index=False))
    print("="*80)
    
    # Guardarecomparaci√≥n
    comparison_df.to_csv('models/lstm_vs_gru_comparison.csv', index=False)
    
except Exception as e:
    print(f"‚ö†Ô∏è No se pudo comparar con LSTM: {e}")

In [None]:
# Visualizaci√≥n de predicciones
test_dates = df_timeseries['fecha'].iloc[SEQ_LENGTH+train_size+val_size:]

fig, axes = plt.subplots(2, 1, figsize=(16, 10))

# Plot 1: Predicciones GRU
axes[0].plot(test_dates, y_test_gru_inv, linewidth=2, label='Real', color='steelblue')
axes[0].plot(test_dates, y_pred_gru_inv, linewidth=2, label='Predicci√≥n GRU', 
             color='coral', linestyle='--')
axes[0].fill_between(test_dates, y_test_gru_inv.flatten(), y_pred_gru_inv.flatten(), 
                      alpha=0.2, color='gray')
axes[0].set_ylabel('Total de Delitos')
axes[0].set_title(f'GRU - Predicciones (MAE: {gru_metrics[0]:.2f}, R¬≤: {gru_metrics[2]:.4f})', 
                  fontsize=12, fontweight='bold')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Plot 2: Comparaci√≥n de errores (si hay LSTM)
try:
    lstm_preds = np.load('models/lstm_predictions_test.npy')
    
    error_lstm = np.abs(y_test_gru_inv.flatten() - lstm_preds.flatten())
    error_gru = np.abs(y_test_gru_inv.flatten() - y_pred_gru_inv.flatten())
    
    axes[1].plot(test_dates, error_lstm, label='Error LSTM', linewidth=2, alpha=0.7)
    axes[1].plot(test_dates, error_gru, label='Error GRU', linewidth=2, alpha=0.7)
    axes[1].set_xlabel('Fecha')
    axes[1].set_ylabel('Error Absoluto')
    axes[1].set_title('Comparaci√≥n de Errores: LSTM vs GRU', fontsize=12, fontweight='bold')
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)
except:
    # Solo errores de GRU
    error_gru = np.abs(y_test_gru_inv.flatten() - y_pred_gru_inv.flatten())
    axes[1].plot(test_dates, error_gru, label='Error GRU', linewidth=2, color='coral')
    axes[1].set_xlabel('Fecha')
    axes[1].set_ylabel('Error Absoluto')
    axes[1].set_title('Error de Predicci√≥n GRU', fontsize=12, fontweight='bold')
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('models/gru_predictions_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Guardar modelo y resultados
print("üíæ Guardando modelo GRU...\n")

gru_model.save('models/gru_predictor_final.keras')

with open('models/gru_scaler.pkl', 'wb') as f:
    pickle.dump(scaler_gru, f)

with open('models/gru_history.pkl', 'wb') as f:
    pickle.dump(history_gru.history, f)

results_gru = {
    'test_mae': gru_metrics[0],
    'test_rmse': gru_metrics[1],
    'test_r2': gru_metrics[2],
    'test_mape': gru_metrics[3],
    'num_parameters': total_params_gru,
    'training_time_seconds': training_time_gru,
    'num_epochs_trained': len(history_gru.history['loss'])
}

with open('models/gru_results.pkl', 'wb') as f:
    pickle.dump(results_gru, f)

print("‚úÖ Modelo GRU guardado")
print("\nüìù Pr√≥ximo paso: CNN para an√°lisis espacial")
print("   ‚Üí Notebook: 05_CNN_Spatial.ipynb")