# Enhanced GRU Hyperparameter Optimization for VIX Forecasting

This notebook implements enhanced GRU optimization with:
- Time series cross-validation
- Statistical significance testing
- Expanded search spaces
- Confidence intervals
- Baseline model comparisons

## Block 1: Import Libraries and Setup

In [None]:
# Import shared utilities
from vix_research_utils import *

# Deep learning imports
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (
    Dense, Dropout, GRU, Input, MultiHeadAttention, LayerNormalization,
    Bidirectional, BatchNormalization, GlobalAveragePooling1D, Add
)
from tensorflow.keras.optimizers import Adam, AdamW
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Hyperparameter optimization
import optuna
from optuna.integration import TFKerasPruningCallback
from optuna.trial import TrialState

# Additional imports
import time
import json
from collections import defaultdict

# Set random seeds for reproducibility
tf.random.set_seed(42)
np.random.seed(42)
optuna.logging.set_verbosity(optuna.logging.WARNING)

print("Enhanced GRU optimization setup completed")
print(f"TensorFlow version: {tf.__version__}")
print(f"Optuna version: {optuna.__version__}")
print(f"GPU Available: {len(tf.config.list_physical_devices('GPU')) > 0}")

## Block 2: Data Preparation

In [None]:
# Prepare data using shared utilities
print("Preparing data...")
vix_raw, vvix_raw = download_market_data()
raw_data = pd.merge(vix_raw, vvix_raw, left_index=True, right_index=True, suffixes=('_VIX', '_VVIX'))
cleaned_data = clean_data(raw_data)
featured_data = create_technical_features(cleaned_data)
optimized_data, pca_model, scaler, selected_features = optimize_features(featured_data)

# Create sequences
n_steps = 30
X, y = create_sequences(optimized_data, n_steps)

# Enhanced data splits
train_size = int(0.7 * len(X))
val_size = int(0.15 * len(X))

X_train = X[:train_size]
y_train = y[:train_size]
X_val = X[train_size:train_size+val_size]
y_val = y[train_size:train_size+val_size]
X_test = X[train_size+val_size:]
y_test = y[train_size+val_size:]

print(f"Data prepared successfully")
print(f"Training samples: {len(X_train)}")
print(f"Validation samples: {len(X_val)}")
print(f"Test samples: {len(X_test)}")
print(f"Features per timestep: {X_train.shape[2]}")

# Create time series CV splits
cv_splits = time_series_split(X_train, y_train, n_splits=5)
print(f"Created {len(cv_splits)} time series CV splits")

## Block 3: Enhanced GRU Architecture Definitions

In [None]:
def build_enhanced_basic_gru(trial, input_shape):
    """Enhanced basic GRU with expanded hyperparameter space"""
    # Expanded GRU hyperparameters
    gru_units_1 = trial.suggest_int('gru_units_1', 16, 256, step=16)
    gru_units_2 = trial.suggest_int('gru_units_2', 8, 128, step=8)
    
    # Enhanced regularization
    dropout_rate = trial.suggest_float('dropout_rate', 0.05, 0.6, step=0.05)
    recurrent_dropout = trial.suggest_float('recurrent_dropout', 0.0, 0.5, step=0.05)
    
    # Dense layer
    dense_units = trial.suggest_int('dense_units', 4, 64, step=4)
    
    # Enhanced optimizer parameters
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-2, log=True)
    
    model = Sequential([
        GRU(gru_units_1, return_sequences=True, recurrent_dropout=recurrent_dropout, input_shape=input_shape),
        BatchNormalization(),
        Dropout(dropout_rate),
        
        GRU(gru_units_2, recurrent_dropout=recurrent_dropout),
        BatchNormalization(),
        Dropout(dropout_rate),
        
        Dense(dense_units, activation='relu'),
        Dropout(dropout_rate),
        Dense(1)
    ])
    
    optimizer = AdamW(learning_rate=learning_rate, weight_decay=weight_decay)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
    return model

def build_enhanced_bidirectional_gru(trial, input_shape):
    """Enhanced bidirectional GRU with expanded search space"""
    # Enhanced GRU hyperparameters
    gru_units_1 = trial.suggest_int('gru_units_1', 16, 192, step=16)
    gru_units_2 = trial.suggest_int('gru_units_2', 8, 96, step=8)
    
    # Enhanced regularization
    dropout_rate = trial.suggest_float('dropout_rate', 0.05, 0.5, step=0.05)
    recurrent_dropout = trial.suggest_float('recurrent_dropout', 0.0, 0.4, step=0.05)
    
    # Dense layer
    dense_units = trial.suggest_int('dense_units', 8, 96, step=4)
    
    # Enhanced optimizer
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-2, log=True)
    
    model = Sequential([
        Bidirectional(GRU(gru_units_1, return_sequences=True, recurrent_dropout=recurrent_dropout), input_shape=input_shape),
        BatchNormalization(),
        Dropout(dropout_rate),
        
        Bidirectional(GRU(gru_units_2, recurrent_dropout=recurrent_dropout)),
        BatchNormalization(),
        Dropout(dropout_rate),
        
        Dense(dense_units, activation='relu'),
        Dropout(dropout_rate),
        Dense(1)
    ])
    
    optimizer = AdamW(learning_rate=learning_rate, weight_decay=weight_decay)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
    return model

def build_enhanced_attention_gru(trial, input_shape):
    """Enhanced GRU with attention and expanded search space"""
    # Enhanced GRU hyperparameters
    gru_units_1 = trial.suggest_int('gru_units_1', 16, 192, step=16)
    gru_units_2 = trial.suggest_int('gru_units_2', 8, 96, step=8)
    
    # Enhanced attention hyperparameters
    num_heads = trial.suggest_int('num_heads', 1, 12)
    key_dim = trial.suggest_int('key_dim', 4, 64, step=4)
    
    # Enhanced regularization
    dropout_rate = trial.suggest_float('dropout_rate', 0.05, 0.5, step=0.05)
    attention_dropout = trial.suggest_float('attention_dropout', 0.0, 0.3, step=0.05)
    
    # Dense layer
    dense_units = trial.suggest_int('dense_units', 8, 96, step=4)
    
    # Enhanced optimizer
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-2, log=True)
    
    inputs = Input(shape=input_shape)
    
    # GRU layers
    gru_out = GRU(gru_units_1, return_sequences=True)(inputs)
    gru_out = BatchNormalization()(gru_out)
    gru_out = Dropout(dropout_rate)(gru_out)
    
    gru_out2 = GRU(gru_units_2, return_sequences=True)(gru_out)
    gru_out2 = BatchNormalization()(gru_out2)
    gru_out2 = Dropout(dropout_rate)(gru_out2)
    
    # Enhanced attention mechanism
    attention = MultiHeadAttention(
        num_heads=num_heads, 
        key_dim=key_dim,
        dropout=attention_dropout
    )(gru_out2, gru_out2)
    attention = LayerNormalization()(attention + gru_out2)
    
    # Global pooling
    pooled = GlobalAveragePooling1D()(attention)
    
    # Dense layers
    x = Dense(dense_units, activation='relu')(pooled)
    x = Dropout(dropout_rate)(x)
    x = Dense(dense_units // 2, activation='relu')(x)
    outputs = Dense(1)(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    optimizer = AdamW(learning_rate=learning_rate, weight_decay=weight_decay)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
    return model

# Architecture registry
ENHANCED_GRU_ARCHITECTURES = {
    'Enhanced_Basic_GRU': build_enhanced_basic_gru,
    'Enhanced_Bidirectional_GRU': build_enhanced_bidirectional_gru,
    'Enhanced_Attention_GRU': build_enhanced_attention_gru
}

print(f"Enhanced GRU architectures defined: {list(ENHANCED_GRU_ARCHITECTURES.keys())}")

## Block 4: Enhanced Optimization Framework

In [None]:
def enhanced_gru_cv_objective(trial, architecture_name, input_shape, X, y, cv_splits):
    """Enhanced GRU objective function with time series cross-validation"""
    model_builder = ENHANCED_GRU_ARCHITECTURES[architecture_name]
    
    # Enhanced training parameters
    batch_size = trial.suggest_categorical('batch_size', [4, 8, 16, 32, 64, 128])
    epochs = trial.suggest_int('epochs', 20, 150, step=10)
    patience = trial.suggest_int('patience', 5, 25, step=2)
    
    fold_scores = []
    
    for fold_idx, (train_idx, val_idx) in enumerate(cv_splits):
        try:
            X_train_fold = X[train_idx]
            y_train_fold = y[train_idx]
            X_val_fold = X[val_idx]
            y_val_fold = y[val_idx]
            
            # Build model for this fold
            model = model_builder(trial, input_shape)
            
            # Enhanced callbacks
            callbacks = [
                EarlyStopping(
                    monitor='val_loss', 
                    patience=patience, 
                    restore_best_weights=True, 
                    verbose=0
                ),
                ReduceLROnPlateau(
                    monitor='val_loss', 
                    factor=0.8, 
                    patience=patience//3, 
                    min_lr=1e-8, 
                    verbose=0
                ),
                TFKerasPruningCallback(trial, 'val_loss')
            ]
            
            # Train model
            history = model.fit(
                X_train_fold, y_train_fold,
                validation_data=(X_val_fold, y_val_fold),
                epochs=epochs,
                batch_size=batch_size,
                callbacks=callbacks,
                verbose=0
            )
            
            # Get best validation loss
            best_val_loss = min(history.history['val_loss'])
            fold_scores.append(best_val_loss)
            
            # Clear memory
            tf.keras.backend.clear_session()
            del model
            
        except Exception as e:
            print(f"Fold {fold_idx} failed: {e}")
            tf.keras.backend.clear_session()
            fold_scores.append(float('inf'))
    
    # Return mean CV score with penalty for failed folds
    valid_scores = [s for s in fold_scores if s != float('inf')]
    if len(valid_scores) >= len(fold_scores) // 2:  # At least half successful
        return np.mean(valid_scores)
    else:
        return float('inf')

def optimize_enhanced_gru_architecture(architecture_name, X, y, cv_splits, n_trials=200):
    """Enhanced GRU optimization with more trials and better pruning"""
    print(f"\nOptimizing {architecture_name} with enhanced methodology...")
    
    # Create enhanced study
    study_name = f"enhanced_gru_{architecture_name.lower()}_optimization"
    study = optuna.create_study(
        direction='minimize',
        study_name=study_name,
        pruner=optuna.pruners.MedianPruner(
            n_startup_trials=10, 
            n_warmup_steps=15,
            interval_steps=5
        ),
        sampler=optuna.samplers.TPESampler(
            n_startup_trials=20,
            n_ei_candidates=50
        )
    )
    
    # Optimize with timeout
    input_shape = (X.shape[1], X.shape[2])
    
    study.optimize(
        lambda trial: enhanced_gru_cv_objective(trial, architecture_name, input_shape, X, y, cv_splits),
        n_trials=n_trials,
        timeout=7200  # 2 hour timeout
    )
    
    return study

print("Enhanced GRU optimization framework ready")

## Block 5: Execute Enhanced GRU Optimization

In [None]:
# Enhanced optimization configuration
gru_optimization_results = {}
n_trials_per_architecture = 200  # Increased for better search

print("Starting enhanced hyperparameter optimization for GRU architectures...")
print(f"Trials per architecture: {n_trials_per_architecture}")
print(f"Cross-validation folds: {len(cv_splits)}")
print(f"Total estimated time: {len(ENHANCED_GRU_ARCHITECTURES) * n_trials_per_architecture * 4 / 60:.1f} minutes")

for arch_name in ENHANCED_GRU_ARCHITECTURES.keys():
    print(f"\nOptimizing {arch_name}...")
    
    try:
        # Run enhanced optimization
        study = optimize_enhanced_gru_architecture(
            arch_name, 
            X_train, 
            y_train, 
            cv_splits, 
            n_trials=n_trials_per_architecture
        )
        
        # Store results
        gru_optimization_results[arch_name] = {
            'study': study,
            'best_params': study.best_params,
            'best_value': study.best_value,
            'n_trials': len(study.trials),
            'n_complete_trials': len([t for t in study.trials if t.state == TrialState.COMPLETE])
        }
        
        print(f"Best CV loss for {arch_name}: {study.best_value:.6f}")
        print(f"Completed trials: {gru_optimization_results[arch_name]['n_complete_trials']}/{len(study.trials)}")
        print(f"Best parameters: {study.best_params}")
        
        # Plot optimization history
        fig = optuna.visualization.plot_optimization_history(study)
        fig.show()
        
        # Plot parameter importance
        if len(study.trials) > 10:
            fig = optuna.visualization.plot_param_importances(study)
            fig.show()
        
    except Exception as e:
        print(f"Optimization failed for {arch_name}: {e}")
        continue

print(f"\nEnhanced GRU hyperparameter optimization completed for {len(gru_optimization_results)} architectures")

## Block 6: Statistical Analysis and Model Evaluation

In [None]:
def evaluate_optimized_gru_model(architecture_name, best_params, X_train, y_train, X_test, y_test):
    """Evaluate optimized GRU model with statistical analysis"""
    input_shape = (X_train.shape[1], X_train.shape[2])
    
    # Create mock trial with best parameters
    class MockTrial:
        def __init__(self, params):
            self.params = params
        
        def suggest_int(self, name, low, high, step=1):
            return self.params.get(name, (low + high) // 2)
        
        def suggest_float(self, name, low, high, log=False, step=None):
            return self.params.get(name, (low + high) / 2)
        
        def suggest_categorical(self, name, choices):
            return self.params.get(name, choices[0])
    
    mock_trial = MockTrial(best_params)
    
    # Build and train best model
    model_builder = ENHANCED_GRU_ARCHITECTURES[architecture_name]
    model = model_builder(mock_trial, input_shape)
    
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=0),
        ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=8, min_lr=1e-8, verbose=0)
    ]
    
    # Train on full training set
    start_time = time.time()
    history = model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=best_params.get('epochs', 100),
        batch_size=best_params.get('batch_size', 32),
        callbacks=callbacks,
        verbose=1
    )
    training_time = time.time() - start_time
    
    # Make predictions
    y_pred = model.predict(X_test, verbose=0).flatten()
    
    # Calculate comprehensive metrics
    metrics = calculate_metrics(y_test, y_pred)
    
    # Calculate confidence intervals
    lower_bound, upper_bound = calculate_confidence_intervals(y_pred)
    
    # Create baseline models for comparison
    baselines = create_baseline_models(y_train, y_test)
    
    # Statistical significance tests against baselines
    significance_tests = {}
    model_errors = (y_test - y_pred) ** 2
    
    for baseline_name, baseline_pred in baselines.items():
        baseline_errors = (y_test - baseline_pred) ** 2
        dm_stat, p_value = diebold_mariano_test(model_errors, baseline_errors)
        significance_tests[baseline_name] = {
            'dm_statistic': dm_stat,
            'p_value': p_value,
            'significant': p_value < 0.05
        }
    
    results = {
        'architecture': architecture_name,
        'best_params': best_params,
        'metrics': metrics,
        'training_time': training_time,
        'total_params': model.count_params(),
        'history': history.history,
        'predictions': y_pred,
        'confidence_intervals': {
            'lower': lower_bound,
            'upper': upper_bound
        },
        'baseline_comparisons': baselines,
        'significance_tests': significance_tests
    }
    
    tf.keras.backend.clear_session()
    return results

# Evaluate all optimized GRU models
gru_evaluation_results = {}

print("\n=== EVALUATING OPTIMIZED GRU MODELS ===")
for arch_name, opt_result in gru_optimization_results.items():
    print(f"\nEvaluating {arch_name}...")
    
    try:
        eval_result = evaluate_optimized_gru_model(
            arch_name,
            opt_result['best_params'],
            X_train,
            y_train,
            X_test,
            y_test
        )
        gru_evaluation_results[arch_name] = eval_result
        
        # Print results
        metrics = eval_result['metrics']
        print(f"  MSE: {metrics['MSE']:.6f}")
        print(f"  MAE: {metrics['MAE']:.6f}")
        print(f"  RMSE: {metrics['RMSE']:.6f}")
        print(f"  R²: {metrics['R2']:.6f}")
        print(f"  Directional Accuracy: {metrics['Directional_Accuracy']:.4f}")
        print(f"  Training Time: {eval_result['training_time']:.2f} seconds")
        print(f"  Model Parameters: {eval_result['total_params']:,}")
        
        # Print significance tests
        print(f"  Statistical Significance vs Baselines:")
        for baseline, test in eval_result['significance_tests'].items():
            significance = "✓" if test['significant'] else "✗"
            print(f"    vs {baseline}: {significance} (p={test['p_value']:.4f})")
        
    except Exception as e:
        print(f"Evaluation failed for {arch_name}: {e}")
        continue

print("\n=== GRU OPTIMIZATION AND EVALUATION COMPLETED ===")