In [1]:
"""
EnergyNexus Advanced LSTM Architectures - OPTIMIZED FOR 85%+ ACCURACY
Aditya's MSc Project - Enhanced LSTM Models for Multi-variate Energy Forecasting

Key Optimizations:
1. Extended training epochs (100+ vs 20)
2. Advanced feature engineering
3. Deeper LSTM architectures
4. Better learning rate scheduling
5. Enhanced data preprocessing
6. Cyclical learning rates
7. Advanced ensemble methods
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from datetime import datetime, timedelta
import sys
import os
import json

# Suppress warnings for clean output
warnings.filterwarnings('ignore')

# Configure plotting
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("Set2")
plt.rcParams['figure.figsize'] = (14, 8)
plt.rcParams['font.size'] = 11

# Add source directory
sys.path.append(os.path.join('..', '..', 'src'))

print("EnergyNexus Advanced LSTM Architectures - OPTIMIZED FOR 85%+ ACCURACY")
print("=" * 70)
print(f"Development started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# Import libraries with error handling
try:
    import tensorflow as tf
    from tensorflow import keras
    from tensorflow.keras import layers, Model, Input, callbacks, optimizers
    from tensorflow.keras.layers import (LSTM, Dense, Dropout, BatchNormalization, 
                                        Attention, MultiHeadAttention, LayerNormalization,
                                        Bidirectional, TimeDistributed, RepeatVector, 
                                        GRU, Conv1D, GlobalMaxPooling1D, Concatenate)
    from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
    from sklearn.model_selection import TimeSeriesSplit
    
    print(f"TensorFlow version: {tf.__version__}")
    tf.random.set_seed(42)
    TENSORFLOW_AVAILABLE = True
    
except ImportError as e:
    print(f"Advanced libraries not available: {e}")
    TENSORFLOW_AVAILABLE = False

# =============================================================================
# ENHANCED DATA PREPARATION WITH ADVANCED FEATURE ENGINEERING
# =============================================================================

def load_comprehensive_energy_data():
    """Load and prepare comprehensive energy dataset with enhanced features."""
    try:
        energy_data = pd.read_csv('../../data/processed/test_cleaned_energy_data.csv', 
                                 parse_dates=[0], index_col=0)
        print("Successfully loaded processed energy dataset")
        data_source = "Processed pipeline data"
        
    except FileNotFoundError:
        print("Creating enhanced energy dataset for advanced modeling...")
        
        # Generate realistic multi-variate energy data
        np.random.seed(42)
        hours = 24 * 180  # 180 days for better patterns
        dates = pd.date_range(start='2024-01-01', periods=hours, freq='H')
        time_hours = np.arange(hours)
        
        # Enhanced weather patterns with more realism
        base_temperature = (15 + 
                           12 * np.sin(2 * np.pi * dates.dayofyear / 365) +
                           8 * np.sin((time_hours % 24 - 14) * 2 * np.pi / 24) +
                           3 * np.sin(2 * np.pi * dates.dayofyear / 7) +  # Weekly variation
                           np.random.normal(0, 2, hours))
        
        wind_speed = (8 + 
                     4 * np.sin(2 * np.pi * dates.dayofyear / 365 + np.pi/3) +
                     2 * np.sin(2 * np.pi * time_hours / (24*7)) +  # Weekly pattern
                     np.random.normal(0, 2, hours))
        wind_speed = np.maximum(0, wind_speed)
        
        cloud_cover = np.random.beta(2, 5, hours) * 100
        
        # Enhanced solar generation with better modeling
        solar_elevation = np.maximum(0, np.sin((time_hours % 24 - 12) * np.pi / 12))
        seasonal_solar = 1 + 0.3 * np.sin(2 * np.pi * dates.dayofyear / 365)
        cloud_attenuation = 1 - (cloud_cover / 100) * 0.8
        temperature_efficiency = 1 - np.maximum(0, base_temperature - 25) * 0.004
        
        solar_generation = (solar_elevation * seasonal_solar * cloud_attenuation * 
                           temperature_efficiency * 250 + np.random.normal(0, 8, hours))
        solar_generation = np.maximum(0, solar_generation)
        
        # Enhanced wind generation with realistic power curve
        wind_generation = np.zeros(hours)
        for i, ws in enumerate(wind_speed):
            if ws < 3:  # Cut-in speed
                wind_generation[i] = 0
            elif ws < 12:  # Cubic region
                wind_generation[i] = 150 * ((ws - 3) / 9) ** 3
            elif ws < 25:  # Rated region
                wind_generation[i] = 150 + np.random.normal(0, 10)
            else:  # Cut-out speed
                wind_generation[i] = 0
        
        wind_generation = np.maximum(0, wind_generation)
        
        # Complex energy demand modeling with more patterns
        demand_base = 550
        daily_residential = 120 * np.maximum(0, np.sin((time_hours % 24 - 7) * np.pi / 11))
        daily_commercial = 180 * np.maximum(0, np.sin((time_hours % 24 - 5) * np.pi / 14))
        weekly_pattern = 80 * np.sin((time_hours % (24*7)) * 2 * np.pi / (24*7))
        
        heating_demand = np.maximum(0, (18 - base_temperature) * 15)
        cooling_demand = np.maximum(0, (base_temperature - 22) * 20)
        
        business_hours = ((dates.hour >= 8) & (dates.hour <= 18) & 
                         (dates.dayofweek < 5)).astype(int)
        industrial_demand = business_hours * 100 + np.random.normal(0, 20, hours)
        
        renewable_total = solar_generation + wind_generation
        grid_tied_reduction = renewable_total * 0.12
        
        # Add demand persistence for better LSTM learning
        demand_noise = np.random.normal(0, 30, hours)
        for i in range(1, hours):
            demand_noise[i] += 0.4 * demand_noise[i-1]  # Autocorrelation
        
        total_demand = (demand_base + daily_residential + daily_commercial + 
                       weekly_pattern + heating_demand + cooling_demand + 
                       industrial_demand - grid_tied_reduction + demand_noise)
        total_demand = np.maximum(350, total_demand)
        
        # Natural gas generation
        supply_shortfall = np.maximum(0, total_demand - renewable_total - 250)
        natural_gas_generation = supply_shortfall * 0.8 + np.random.normal(0, 25, hours)
        natural_gas_generation = np.maximum(0, natural_gas_generation)
        
        # Grid frequency
        total_supply = renewable_total + natural_gas_generation + 250
        frequency_deviation = (total_supply - total_demand) * 0.0008
        grid_frequency = 50.0 + frequency_deviation + np.random.normal(0, 0.015, hours)
        grid_frequency = np.clip(grid_frequency, 49.7, 50.3)
        
        # Energy price with more complexity
        demand_factor = (total_demand - total_demand.mean()) / total_demand.std() * 12
        renewable_factor = -(renewable_total - renewable_total.mean()) / renewable_total.std() * 8
        gas_price_factor = (natural_gas_generation - natural_gas_generation.mean()) / natural_gas_generation.std() * 6
        volatility = np.random.normal(0, 4, hours)
        
        energy_price = 45 + demand_factor + renewable_factor + gas_price_factor + volatility
        energy_price = np.maximum(15, energy_price)
        
        # Create comprehensive dataset with ENHANCED FEATURES
        energy_data = pd.DataFrame({
            # Primary targets
            'energy_demand': total_demand,
            'solar_generation': solar_generation,
            'wind_generation': wind_generation,
            'natural_gas_generation': natural_gas_generation,
            
            # Derived variables
            'total_renewable': renewable_total,
            'total_generation': renewable_total + natural_gas_generation + 250,
            'renewable_penetration': renewable_total / total_demand * 100,
            'supply_demand_balance': (renewable_total + natural_gas_generation + 250) - total_demand,
            
            # Weather and external factors
            'temperature': base_temperature,
            'wind_speed': wind_speed,
            'cloud_cover': cloud_cover,
            
            # System indicators
            'grid_frequency': grid_frequency,
            'energy_price': energy_price,
            
            # Temporal features
            'hour': dates.hour,
            'day_of_week': dates.dayofweek,
            'month': dates.month,
            'day_of_year': dates.dayofyear,
            'is_weekend': dates.dayofweek >= 5,
            'is_business_hour': business_hours,
            'is_peak_hour': dates.hour.isin([17, 18, 19, 20]),
            
            # Enhanced cyclical encodings
            'hour_sin': np.sin(2 * np.pi * dates.hour / 24),
            'hour_cos': np.cos(2 * np.pi * dates.hour / 24),
            'day_sin': np.sin(2 * np.pi * dates.dayofweek / 7),
            'day_cos': np.cos(2 * np.pi * dates.dayofweek / 7),
            'month_sin': np.sin(2 * np.pi * dates.month / 12),
            'month_cos': np.cos(2 * np.pi * dates.month / 12),
            'year_sin': np.sin(2 * np.pi * dates.dayofyear / 365),
            'year_cos': np.cos(2 * np.pi * dates.dayofyear / 365)
        }, index=dates)
        
        data_source = "Generated enhanced multi-variate sample data"
    
    # ADVANCED FEATURE ENGINEERING
    energy_data = add_advanced_features(energy_data)
    
    # Validation summary
    preparation_summary = {
        'total_records': len(energy_data),
        'date_range': {
            'start': energy_data.index.min(),
            'end': energy_data.index.max()
        },
        'variables': {
            'total_variables': len(energy_data.columns),
            'target_variables': ['energy_demand', 'solar_generation', 'wind_generation'],
        },
        'data_quality': {
            'missing_values': energy_data.isnull().sum().sum(),
            'data_completeness': (1 - energy_data.isnull().sum().sum() / 
                                (len(energy_data) * len(energy_data.columns))) * 100
        },
        'data_source': data_source
    }
    
    print(f"Data source: {data_source}")
    print(f"Dataset shape: {energy_data.shape}")
    print(f"Date range: {energy_data.index.min()} to {energy_data.index.max()}")
    print(f"Data completeness: {preparation_summary['data_quality']['data_completeness']:.1f}%")
    
    return energy_data, preparation_summary

def add_advanced_features(data):
    """Add advanced features for better LSTM performance."""
    print("Adding advanced features for enhanced LSTM performance...")
    
    # Lag features for temporal dependencies
    for col in ['energy_demand', 'solar_generation', 'wind_generation']:
        if col in data.columns:
            data[f'{col}_lag_1h'] = data[col].shift(1)
            data[f'{col}_lag_6h'] = data[col].shift(6)
            data[f'{col}_lag_24h'] = data[col].shift(24)
            data[f'{col}_lag_48h'] = data[col].shift(48)
            data[f'{col}_lag_168h'] = data[col].shift(168)  # Weekly lag
    
    # Rolling statistics
    for col in ['energy_demand', 'solar_generation', 'wind_generation']:
        if col in data.columns:
            data[f'{col}_rolling_mean_6h'] = data[col].rolling(window=6).mean()
            data[f'{col}_rolling_mean_24h'] = data[col].rolling(window=24).mean()
            data[f'{col}_rolling_std_24h'] = data[col].rolling(window=24).std()
            data[f'{col}_rolling_max_24h'] = data[col].rolling(window=24).max()
            data[f'{col}_rolling_min_24h'] = data[col].rolling(window=24).min()
    
    # Interaction features
    if 'temperature' in data.columns and 'hour' in data.columns:
        data['temp_hour_interaction'] = data['temperature'] * data['hour']
    
    if 'renewable_penetration' in data.columns and 'energy_demand' in data.columns:
        data['renewable_demand_ratio'] = data['renewable_penetration'] / (data['energy_demand'] + 1e-8)
    
    # Weather-based features
    if 'temperature' in data.columns:
        data['heating_degree_hours'] = np.maximum(0, 18 - data['temperature'])
        data['cooling_degree_hours'] = np.maximum(0, data['temperature'] - 22)
    
    # Time-based features
    data['quarter'] = data.index.quarter
    data['week_of_year'] = data.index.isocalendar().week
    
    # Advanced cyclical features
    data['quarter_sin'] = np.sin(2 * np.pi * data['quarter'] / 4)
    data['quarter_cos'] = np.cos(2 * np.pi * data['quarter'] / 4)
    
    # Remove rows with NaN values from lag features
    data = data.dropna()
    
    print(f"Enhanced dataset shape after feature engineering: {data.shape}")
    
    return data

def create_multi_variate_sequences(data, target_cols, feature_cols, 
                                  sequence_length=168, forecast_horizons=[1, 6, 24]):  # Extended to weekly
    """Create multi-variate sequences for advanced LSTM with extended sequence length."""
    print(f"Creating multi-variate sequences for advanced LSTM modeling")
    print(f"Target variables: {target_cols}")
    print(f"Feature variables: {len(feature_cols)} features")
    print(f"Sequence length: {sequence_length} hours (EXTENDED FOR WEEKLY PATTERNS)")
    print(f"Forecast horizons: {forecast_horizons}")
    
    # Prepare data arrays
    feature_data = data[feature_cols].values
    target_data = data[target_cols].values
    timestamps = data.index
    
    # Create sequences
    X_sequences = []
    y_multi_target_horizon = []
    sequence_timestamps = []
    
    max_horizon = max(forecast_horizons)
    
    for i in range(sequence_length, len(data) - max_horizon):
        # Extract feature sequence
        feature_sequence = feature_data[i-sequence_length:i]
        X_sequences.append(feature_sequence)
        
        # Extract multi-target, multi-horizon outputs
        multi_target_horizons = []
        
        for target_idx in range(len(target_cols)):
            target_horizons = []
            for horizon in forecast_horizons:
                target_value = target_data[i + horizon - 1, target_idx]
                target_horizons.append(target_value)
            multi_target_horizons.extend(target_horizons)
        
        y_multi_target_horizon.append(multi_target_horizons)
        sequence_timestamps.append(timestamps[i])
    
    X_sequences = np.array(X_sequences)
    y_multi_target_horizon = np.array(y_multi_target_horizon)
    sequence_timestamps = np.array(sequence_timestamps)
    
    print(f"Created multi-variate sequences:")
    print(f"  X_shape: {X_sequences.shape}")
    print(f"  y_shape: {y_multi_target_horizon.shape}")
    print(f"  Output structure: {len(target_cols)} targets × {len(forecast_horizons)} horizons")
    
    return X_sequences, y_multi_target_horizon, sequence_timestamps

# =============================================================================
# ENHANCED LSTM ARCHITECTURES FOR 85%+ ACCURACY
# =============================================================================

def build_deep_attention_lstm_model(input_shape, output_size, 
                                   lstm_units=[128, 64, 32], attention_units=64,
                                   dropout_rate=0.1, learning_rate=0.0005):
    """Build DEEPER attention-based LSTM model for higher accuracy."""
    print(f"Building DEEP attention-based LSTM model for 85%+ accuracy")
    print(f"Input shape: {input_shape}")
    print(f"Output size: {output_size}")
    print(f"Architecture: DEEPER with {len(lstm_units)} LSTM layers")
    
    if not TENSORFLOW_AVAILABLE:
        return None
    
    try:
        # Define input layer
        inputs = Input(shape=input_shape, name='energy_sequence_input')
        
        # Multi-layer LSTM with return sequences
        x = inputs
        
        for i, units in enumerate(lstm_units):
            x = LSTM(
                units=units,
                return_sequences=True,  # Always true for attention
                dropout=dropout_rate,
                recurrent_dropout=dropout_rate,
                name=f'lstm_attention_layer_{i+1}'
            )(x)
            
            x = BatchNormalization(name=f'batch_norm_lstm_{i+1}')(x)
            print(f"  DEEP LSTM Layer {i+1}: {units} units")
        
        # Enhanced attention mechanism
        attention_weights = Dense(attention_units, activation='tanh', name='attention_tanh')(x)
        attention_weights = Dense(1, activation='softmax', name='attention_weights')(attention_weights)
        
        # Apply attention
        attended_features = layers.Multiply(name='attention_multiply')([x, attention_weights])
        
        # Global pooling
        global_features = layers.GlobalAveragePooling1D(name='global_attention_pool')(attended_features)
        
        # Deeper dense layers
        dense_1 = Dense(128, activation='relu', name='dense_attention_1')(global_features)
        dense_1 = Dropout(dropout_rate, name='dropout_dense_1')(dense_1)
        dense_1 = BatchNormalization(name='batch_norm_dense_1')(dense_1)
        
        dense_2 = Dense(64, activation='relu', name='dense_attention_2')(dense_1)
        dense_2 = Dropout(dropout_rate, name='dropout_dense_2')(dense_2)
        dense_2 = BatchNormalization(name='batch_norm_dense_2')(dense_2)
        
        dense_3 = Dense(32, activation='relu', name='dense_attention_3')(dense_2)
        dense_3 = Dropout(dropout_rate, name='dropout_dense_3')(dense_3)
        
        # Output layer
        outputs = Dense(output_size, activation='linear', name='attention_forecast_output')(dense_3)
        
        # Create model
        model = Model(inputs=inputs, outputs=outputs, name='DeepAttentionLSTM_EnergyForecaster')
        
        # Use lower learning rate for stability
        optimizer = optimizers.Adam(learning_rate=learning_rate)
        model.compile(
            optimizer=optimizer,
            loss='mse',
            metrics=['mae', 'mape']
        )
        
        print(f"  DEEP Attention-based LSTM model compiled successfully")
        print(f"  Total parameters: {model.count_params():,}")
        return model
        
    except Exception as e:
        print(f"Error building deep attention LSTM: {e}")
        return None

def build_hybrid_cnn_lstm_model(input_shape, output_size,
                               conv_filters=[64, 32], lstm_units=[128, 64],
                               dropout_rate=0.1, learning_rate=0.0005):
    """Build hybrid CNN-LSTM model for feature extraction and temporal modeling."""
    print(f"Building HYBRID CNN-LSTM model for enhanced feature extraction")
    
    if not TENSORFLOW_AVAILABLE:
        return None
    
    try:
        inputs = Input(shape=input_shape, name='hybrid_input')
        
        # CNN for feature extraction
        x = inputs
        for i, filters in enumerate(conv_filters):
            x = Conv1D(filters=filters, kernel_size=3, activation='relu', 
                      name=f'conv1d_{i+1}')(x)
            x = BatchNormalization(name=f'conv_bn_{i+1}')(x)
            x = Dropout(dropout_rate, name=f'conv_dropout_{i+1}')(x)
            print(f"  CNN Layer {i+1}: {filters} filters")
        
        # LSTM for temporal modeling
        for i, units in enumerate(lstm_units):
            return_sequences = (i < len(lstm_units) - 1)
            x = LSTM(units=units, return_sequences=return_sequences,
                    dropout=dropout_rate, recurrent_dropout=dropout_rate,
                    name=f'lstm_layer_{i+1}')(x)
            if return_sequences:
                x = BatchNormalization(name=f'lstm_bn_{i+1}')(x)
            print(f"  LSTM Layer {i+1}: {units} units")
        
        # Dense layers
        x = Dense(128, activation='relu', name='dense_1')(x)
        x = Dropout(dropout_rate, name='dense_dropout_1')(x)
        x = BatchNormalization(name='dense_bn_1')(x)
        
        x = Dense(64, activation='relu', name='dense_2')(x)
        x = Dropout(dropout_rate, name='dense_dropout_2')(x)
        
        outputs = Dense(output_size, activation='linear', name='hybrid_output')(x)
        
        model = Model(inputs=inputs, outputs=outputs, name='HybridCNN_LSTM')
        
        optimizer = optimizers.Adam(learning_rate=learning_rate)
        model.compile(optimizer=optimizer, loss='mse', metrics=['mae', 'mape'])
        
        print(f"  Hybrid CNN-LSTM model compiled successfully")
        print(f"  Total parameters: {model.count_params():,}")
        return model
        
    except Exception as e:
        print(f"Error building hybrid CNN-LSTM: {e}")
        return None

def build_transformer_lstm_model(input_shape, output_size,
                                lstm_units=[128, 64], num_heads=8, ff_dim=128,
                                dropout_rate=0.1, learning_rate=0.0005):
    """Build Transformer-enhanced LSTM model."""
    print(f"Building TRANSFORMER-enhanced LSTM model")
    
    if not TENSORFLOW_AVAILABLE:
        return None
    
    try:
        inputs = Input(shape=input_shape, name='transformer_input')
        
        # LSTM processing
        x = inputs
        for i, units in enumerate(lstm_units):
            x = LSTM(units=units, return_sequences=True,
                    dropout=dropout_rate, recurrent_dropout=dropout_rate,
                    name=f'lstm_layer_{i+1}')(x)
            x = BatchNormalization(name=f'lstm_bn_{i+1}')(x)
            print(f"  LSTM Layer {i+1}: {units} units")
        
        # Multi-head attention
        attention_output = MultiHeadAttention(
            num_heads=num_heads, key_dim=64, name='multi_head_attention'
        )(x, x)
        
        # Add & Norm
        x = layers.Add(name='add_attention')([x, attention_output])
        x = LayerNormalization(name='norm_attention')(x)
        
        # Feed Forward
        ff_output = Dense(ff_dim, activation='relu', name='ff_1')(x)
        ff_output = Dropout(dropout_rate, name='ff_dropout')(ff_output)
        ff_output = Dense(lstm_units[-1], name='ff_2')(ff_output)
        
        # Add & Norm
        x = layers.Add(name='add_ff')([x, ff_output])
        x = LayerNormalization(name='norm_ff')(x)
        
        # Global pooling
        x = layers.GlobalAveragePooling1D(name='global_pool')(x)
        
        # Dense layers
        x = Dense(128, activation='relu', name='dense_1')(x)
        x = Dropout(dropout_rate, name='dense_dropout_1')(x)
        x = BatchNormalization(name='dense_bn_1')(x)
        
        x = Dense(64, activation='relu', name='dense_2')(x)
        x = Dropout(dropout_rate, name='dense_dropout_2')(x)
        
        outputs = Dense(output_size, activation='linear', name='transformer_output')(x)
        
        model = Model(inputs=inputs, outputs=outputs, name='TransformerLSTM')
        
        optimizer = optimizers.Adam(learning_rate=learning_rate)
        model.compile(optimizer=optimizer, loss='mse', metrics=['mae', 'mape'])
        
        print(f"  Transformer-LSTM model compiled successfully")
        print(f"  Total parameters: {model.count_params():,}")
        return model
        
    except Exception as e:
        print(f"Error building Transformer-LSTM: {e}")
        return None

# =============================================================================
# ADVANCED TRAINING STRATEGIES FOR 85%+ ACCURACY
# =============================================================================

def create_advanced_callbacks_for_accuracy(model_name, patience=50):
    """Create advanced callbacks optimized for high accuracy."""
    print(f"Setting up ADVANCED callbacks for {model_name} (targeting 85%+ accuracy)...")
    
    model_dir = '../../models/advanced_lstm'
    log_dir = '../../results/logs/advanced_training'
    os.makedirs(model_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    
    callbacks_list = []
    
    # Extended early stopping for better convergence
    early_stopping = callbacks.EarlyStopping(
        monitor='val_loss',
        patience=patience,  # Increased patience
        restore_best_weights=True,
        verbose=1,
        mode='min',
        min_delta=1e-7  # Smaller min_delta for fine-tuning
    )
    callbacks_list.append(early_stopping)
    
    # Model checkpoint
    checkpoint_path = os.path.join(model_dir, f'{model_name}_best.h5')
    model_checkpoint = callbacks.ModelCheckpoint(
        filepath=checkpoint_path,
        monitor='val_loss',
        save_best_only=True,
        save_weights_only=False,
        verbose=1,
        mode='min'
    )
    callbacks_list.append(model_checkpoint)
    
    # Advanced learning rate scheduling
    lr_scheduler = callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.3,  # More aggressive reduction
        patience=15,  # More patience before reduction
        min_lr=1e-8,  # Lower minimum learning rate
        verbose=1,
        mode='min'
    )
    callbacks_list.append(lr_scheduler)
    
    # Cyclical learning rate for better convergence
    try:
        from tensorflow.keras.callbacks import LearningRateScheduler
        
        def cyclical_lr_schedule(epoch, lr):
            """Cyclical learning rate schedule."""
            cycle_length = 20
            cycle = epoch // cycle_length
            x = epoch % cycle_length
            if x < cycle_length // 2:
                return lr * (1 + x / (cycle_length // 2))
            else:
                return lr * (2 - x / (cycle_length // 2))
        
        cyclical_lr = LearningRateScheduler(cyclical_lr_schedule, verbose=0)
        callbacks_list.append(cyclical_lr)
        
    except:
        print("  Cyclical LR not available, using standard scheduling")
    
    # CSV logger
    csv_logger = callbacks.CSVLogger(
        os.path.join(log_dir, f'{model_name}_training_metrics.csv'),
        append=True
    )
    callbacks_list.append(csv_logger)
    
    print(f"  ADVANCED callbacks configured: {len(callbacks_list)} callbacks")
    print(f"  Early stopping patience: {patience} epochs")
    print(f"  Advanced learning rate scheduling enabled")
    
    return callbacks_list

def train_advanced_model_for_accuracy(model, model_name, X_train, y_train, X_val, y_val,
                                    epochs=150, batch_size=16):  # Extended training
    """Train advanced LSTM model with optimization for 85%+ accuracy."""
    print(f"Training ADVANCED {model_name} model for 85%+ accuracy...")
    print(f"  Training samples: {X_train.shape[0]}")
    print(f"  Validation samples: {X_val.shape[0]}")
    print(f"  EXTENDED TRAINING: {epochs} epochs (vs previous 20)")
    print(f"  SMALLER BATCH SIZE: {batch_size} (for better gradients)")
    
    if not hasattr(model, 'fit'):
        print(f"  Mock training completed for {model_name}")
        return None
    
    # Create advanced callbacks
    advanced_callbacks = create_advanced_callbacks_for_accuracy(model_name, patience=50)
    
    training_start = datetime.now()
    
    try:
        # Train model with extended epochs
        history = model.fit(
            X_train, y_train,
            epochs=epochs,  # Extended training
            batch_size=batch_size,  # Smaller batch size
            validation_data=(X_val, y_val),
            callbacks=advanced_callbacks,
            verbose=1,
            shuffle=False  # Maintain temporal order
        )
        
        training_end = datetime.now()
        training_duration = training_end - training_start
        
        print(f"  EXTENDED TRAINING completed: {training_duration}")
        print(f"  Final training loss: {history.history['loss'][-1]:.6f}")
        print(f"  Final validation loss: {history.history['val_loss'][-1]:.6f}")
        print(f"  Best epoch: {np.argmin(history.history['val_loss']) + 1}")
        print(f"  Training epochs completed: {len(history.history['loss'])}")
        
        return history
        
    except Exception as e:
        print(f"  Training error: {e}")
        return None

def calculate_r2_from_val_loss(val_loss, data_variance):
    """Estimate R² from validation loss."""
    # R² = 1 - (SSres / SStot) ≈ 1 - (val_loss / var(y))
    r2_estimate = max(0, 1 - (val_loss / data_variance))
    return r2_estimate

# =============================================================================
# ENHANCED ENSEMBLE METHODS FOR 85%+ ACCURACY
# =============================================================================

def build_super_ensemble_models(input_shape, output_size, num_models=5):
    """Build super ensemble with diverse architectures for maximum accuracy."""
    print(f"Building SUPER ENSEMBLE with {num_models} diverse models for 85%+ accuracy")
    
    ensemble_models = []
    model_configs = [
        {
            'name': 'deep_attention_lstm',
            'builder': build_deep_attention_lstm_model,
            'params': {'lstm_units': [128, 64, 32], 'attention_units': 64}
        },
        {
            'name': 'hybrid_cnn_lstm',
            'builder': build_hybrid_cnn_lstm_model,
            'params': {'conv_filters': [64, 32], 'lstm_units': [128, 64]}
        },
        {
            'name': 'transformer_lstm',
            'builder': build_transformer_lstm_model,
            'params': {'lstm_units': [128, 64], 'num_heads': 8}
        },
        {
            'name': 'bidirectional_lstm',
            'builder': build_bidirectional_lstm_model,
            'params': {'lstm_units': [128, 64]}
        },
        {
            'name': 'deep_multivariate_lstm',
            'builder': build_deep_multivariate_lstm_model,
            'params': {'lstm_units': [128, 64, 32]}
        }
    ]
    
    for i, config in enumerate(model_configs[:num_models]):
        print(f"  Building ensemble member {i+1}: {config['name']}")
        
        try:
            model = config['builder'](
                input_shape=input_shape,
                output_size=output_size,
                **config['params']
            )
            
            if model is not None:
                ensemble_models.append({
                    'model': model,
                    'name': config['name'],
                    'config': config
                })
                print(f"    ✓ {config['name']} built successfully")
            else:
                print(f"    ✗ {config['name']} failed to build")
                
        except Exception as e:
            print(f"    ✗ Error building {config['name']}: {e}")
    
    print(f"  SUPER ENSEMBLE built with {len(ensemble_models)} diverse models")
    return ensemble_models

def build_bidirectional_lstm_model(input_shape, output_size, lstm_units=[128, 64],
                                  dropout_rate=0.1, learning_rate=0.0005):
    """Build bidirectional LSTM model."""
    print(f"Building BIDIRECTIONAL LSTM model")
    
    if not TENSORFLOW_AVAILABLE:
        return None
    
    try:
        inputs = Input(shape=input_shape, name='bidirectional_input')
        
        x = inputs
        for i, units in enumerate(lstm_units):
            return_sequences = (i < len(lstm_units) - 1)
            
            x = Bidirectional(LSTM(
                units=units,
                return_sequences=return_sequences,
                dropout=dropout_rate,
                recurrent_dropout=dropout_rate,
                name=f'lstm_layer_{i+1}'
            ), name=f'bidirectional_{i+1}')(x)
            
            if return_sequences:
                x = BatchNormalization(name=f'bn_{i+1}')(x)
            print(f"  Bidirectional LSTM Layer {i+1}: {units*2} units (forward + backward)")
        
        # Dense layers
        x = Dense(128, activation='relu', name='dense_1')(x)
        x = Dropout(dropout_rate, name='dropout_1')(x)
        x = BatchNormalization(name='bn_dense_1')(x)
        
        x = Dense(64, activation='relu', name='dense_2')(x)
        x = Dropout(dropout_rate, name='dropout_2')(x)
        
        outputs = Dense(output_size, activation='linear', name='bidirectional_output')(x)
        
        model = Model(inputs=inputs, outputs=outputs, name='BidirectionalLSTM')
        
        optimizer = optimizers.Adam(learning_rate=learning_rate)
        model.compile(optimizer=optimizer, loss='mse', metrics=['mae', 'mape'])
        
        print(f"  Bidirectional LSTM compiled successfully")
        return model
        
    except Exception as e:
        print(f"Error building Bidirectional LSTM: {e}")
        return None

def build_deep_multivariate_lstm_model(input_shape, output_size, lstm_units=[128, 64, 32],
                                      dropout_rate=0.1, learning_rate=0.0005):
    """Build deep multivariate LSTM model."""
    print(f"Building DEEP MULTIVARIATE LSTM model")
    
    if not TENSORFLOW_AVAILABLE:
        return None
    
    try:
        inputs = Input(shape=input_shape, name='deep_multivariate_input')
        
        # Multiple parallel LSTM branches
        branch_outputs = []
        
        for branch_idx in range(3):  # 3 branches for diversity
            x = inputs
            
            for i, units in enumerate(lstm_units):
                return_sequences = (i < len(lstm_units) - 1)
                
                x = LSTM(
                    units=units + branch_idx * 16,  # Slight variation per branch
                    return_sequences=return_sequences,
                    dropout=dropout_rate + branch_idx * 0.02,
                    recurrent_dropout=dropout_rate,
                    name=f'branch_{branch_idx}_lstm_{i+1}'
                )(x)
                
                if return_sequences:
                    x = BatchNormalization(name=f'branch_{branch_idx}_bn_{i+1}')(x)
            
            branch_outputs.append(x)
            print(f"  Branch {branch_idx + 1}: {[u + branch_idx * 16 for u in lstm_units]} units")
        
        # Combine branches
        combined = Concatenate(name='combine_branches')(branch_outputs)
        
        # Dense layers
        x = Dense(256, activation='relu', name='dense_1')(combined)
        x = Dropout(dropout_rate, name='dropout_1')(x)
        x = BatchNormalization(name='bn_1')(x)
        
        x = Dense(128, activation='relu', name='dense_2')(x)
        x = Dropout(dropout_rate, name='dropout_2')(x)
        x = BatchNormalization(name='bn_2')(x)
        
        x = Dense(64, activation='relu', name='dense_3')(x)
        x = Dropout(dropout_rate, name='dropout_3')(x)
        
        outputs = Dense(output_size, activation='linear', name='deep_multivariate_output')(x)
        
        model = Model(inputs=inputs, outputs=outputs, name='DeepMultivariateLSTM')
        
        optimizer = optimizers.Adam(learning_rate=learning_rate)
        model.compile(optimizer=optimizer, loss='mse', metrics=['mae', 'mape'])
        
        print(f"  Deep Multivariate LSTM compiled successfully")
        print(f"  Total parameters: {model.count_params():,}")
        return model
        
    except Exception as e:
        print(f"Error building Deep Multivariate LSTM: {e}")
        return None

# =============================================================================
# MAIN EXECUTION PIPELINE FOR 85%+ ACCURACY
# =============================================================================

def main_optimized_for_accuracy():
    """Main execution pipeline optimized for achieving 85%+ accuracy."""
    print("\nEXECUTING OPTIMIZED ADVANCED LSTM PIPELINE FOR 85%+ ACCURACY")
    print("=" * 70)
    
    # Load data with enhanced features
    print("\nSTEP 1: ENHANCED DATA LOADING AND PREPARATION")
    print("-" * 50)
    
    energy_data, data_prep_summary = load_comprehensive_energy_data()
    
    # Check data size - don't sample too aggressively as we need data for accuracy
    print(f"Dataset size: {len(energy_data)} records")
    
    if len(energy_data) > 50000:
        print("Large dataset detected - moderate sampling to preserve patterns...")
        sample_rate = max(1, len(energy_data) // 30000)  # Less aggressive sampling
        energy_data = energy_data.iloc[::sample_rate].copy()
        print(f"Sampled dataset size: {len(energy_data)} records (sample rate: 1/{sample_rate})")
    
    # Enhanced parameters for better accuracy
    SEQUENCE_LENGTH = 168  # Weekly patterns (7 days * 24 hours)
    FORECAST_HORIZONS = [1, 6, 24]  # Multiple horizons
    
    print(f"ENHANCED PARAMETERS for 85%+ accuracy:")
    print(f"  Sequence length: {SEQUENCE_LENGTH} hours (WEEKLY PATTERNS)")
    print(f"  Forecast horizons: {FORECAST_HORIZONS}")
    
    # Enhanced target and feature selection
    print(f"Available columns: {len(energy_data.columns)} total")
    
    # Find targets
    possible_target_names = {
        'energy_demand': ['energy_demand', 'demand', 'load', 'consumption'],
        'solar_generation': ['solar_generation', 'solar', 'pv', 'photovoltaic'],
        'wind_generation': ['wind_generation', 'wind']
    }
    
    actual_targets = []
    for target_key, possible_names in possible_target_names.items():
        for name in possible_names:
            if name in energy_data.columns:
                actual_targets.append(name)
                print(f"Found target: {name}")
                break
    
    if not actual_targets:
        numeric_cols = energy_data.select_dtypes(include=[np.number]).columns.tolist()
        actual_targets = numeric_cols[:3]
        print(f"Using first 3 numeric columns as targets: {actual_targets}")
    
    # Enhanced feature selection - use MORE features for better accuracy
    available_features = []
    for col in energy_data.columns:
        if (energy_data[col].dtype in [np.float64, np.float32, np.int64, np.int32] and 
            col not in actual_targets):
            available_features.append(col)
    
    # Use more features for higher accuracy (up to 25)
    if len(available_features) > 25:
        # Prioritize important features
        important_features = []
        for col in available_features:
            col_lower = col.lower()
            if any(keyword in col_lower for keyword in 
                   ['temp', 'wind', 'solar', 'hour', 'day', 'lag', 'rolling', 'renewable']):
                important_features.append(col)
        
        # Fill remaining with other features
        remaining_features = [col for col in available_features if col not in important_features]
        available_features = important_features + remaining_features[:25-len(important_features)]
        
        print(f"Selected {len(available_features)} ENHANCED features for higher accuracy")
    
    print(f"Selected targets: {actual_targets}")
    print(f"Selected features: {len(available_features)} features")
    
    # Create enhanced sequences
    print("\nSTEP 2: ENHANCED SEQUENCE CREATION")
    print("-" * 40)
    
    X_mv_sequences, y_mv_multi_horizon, mv_timestamps = create_multi_variate_sequences(
        energy_data, actual_targets, available_features,
        SEQUENCE_LENGTH, FORECAST_HORIZONS
    )
    
    # Check memory and proceed more conservatively
    memory_required_gb = (X_mv_sequences.nbytes + y_mv_multi_horizon.nbytes) / (1024**3)
    print(f"Memory required: {memory_required_gb:.2f} GB")
    
    if memory_required_gb > 4.0:  # More generous memory allowance
        print("High memory usage - selective sampling...")
        sample_size = min(len(X_mv_sequences), 15000)  # Keep more data
        indices = np.random.choice(len(X_mv_sequences), sample_size, replace=False)
        indices = np.sort(indices)
        
        X_mv_sequences = X_mv_sequences[indices]
        y_mv_multi_horizon = y_mv_multi_horizon[indices]
        mv_timestamps = mv_timestamps[indices]
        
        print(f"Sampled to {len(X_mv_sequences)} sequences")
    
    # Enhanced data splits for better training
    print("\nSTEP 3: ENHANCED DATA SPLITTING")
    print("-" * 35)
    
    total_sequences = len(X_mv_sequences)
    train_size = int(0.75 * total_sequences)  # More training data
    val_size = int(0.15 * total_sequences)
    
    X_train = X_mv_sequences[:train_size]
    y_train = y_mv_multi_horizon[:train_size]
    
    X_val = X_mv_sequences[train_size:train_size + val_size]
    y_val = y_mv_multi_horizon[train_size:train_size + val_size]
    
    X_test = X_mv_sequences[train_size + val_size:]
    y_test = y_mv_multi_horizon[train_size + val_size:]
    
    print(f"ENHANCED splits for 85%+ accuracy:")
    print(f"  Training: {len(X_train)} sequences (75%)")
    print(f"  Validation: {len(X_val)} sequences (15%)")
    print(f"  Test: {len(X_test)} sequences (10%)")
    
    # Enhanced normalization
    print("\nSTEP 4: ENHANCED DATA NORMALIZATION")
    print("-" * 40)
    
    # More sophisticated normalization
    feature_scaler = RobustScaler()  # Better for outliers
    target_scaler = StandardScaler()
    
    original_shape = X_train.shape
    X_train_reshaped = X_train.reshape(-1, X_train.shape[-1])
    
    feature_scaler.fit(X_train_reshaped)
    
    X_train_scaled = feature_scaler.transform(X_train_reshaped).reshape(original_shape)
    X_val_scaled = feature_scaler.transform(X_val.reshape(-1, X_val.shape[-1])).reshape(X_val.shape)
    X_test_scaled = feature_scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)
    
    y_train_scaled = target_scaler.fit_transform(y_train)
    y_val_scaled = target_scaler.transform(y_val)
    y_test_scaled = target_scaler.transform(y_test)
    
    print(f"Enhanced normalization completed")
    print(f"  Feature scaling: RobustScaler (better outlier handling)")
    print(f"  Target scaling: StandardScaler")
    
    # Calculate data variance for R² estimation
    data_variance = np.var(y_val_scaled)
    print(f"  Data variance for R² estimation: {data_variance:.6f}")
    
    # Build enhanced models
    print("\nSTEP 5: BUILDING ENHANCED MODEL ARCHITECTURES")
    print("-" * 50)
    
    input_shape = (SEQUENCE_LENGTH, len(available_features))
    output_size = len(actual_targets) * len(FORECAST_HORIZONS)
    
    print(f"Enhanced model configuration:")
    print(f"  Input shape: {input_shape}")
    print(f"  Output size: {output_size}")
    print(f"  Architecture focus: DEEP networks for 85%+ accuracy")
    
    # Build super ensemble
    ensemble_models = build_super_ensemble_models(input_shape, output_size, num_models=3)
    
    print("Enhanced models built successfully")
    
    # Enhanced training
    print("\nSTEP 6: ENHANCED MODEL TRAINING FOR 85%+ ACCURACY")
    print("-" * 55)
    
    training_results = {}
    
    # Extended training parameters
    ENHANCED_EPOCHS = 150  # Much longer training
    ENHANCED_BATCH_SIZE = 16  # Smaller for better gradients
    
    print(f"ENHANCED TRAINING CONFIGURATION:")
    print(f"  Epochs: {ENHANCED_EPOCHS} (vs previous 20)")
    print(f"  Batch size: {ENHANCED_BATCH_SIZE} (smaller for better gradients)")
    print(f"  Advanced callbacks: Extended patience, cyclical LR")
    
    # Train each model in the ensemble
    for i, model_info in enumerate(ensemble_models):
        model_name = model_info['name']
        model = model_info['model']
        
        print(f"\n--- Training Enhanced Model {i+1}: {model_name} ---")
        
        history = train_advanced_model_for_accuracy(
            model=model,
            model_name=model_name,
            X_train=X_train_scaled,
            y_train=y_train_scaled,
            X_val=X_val_scaled,
            y_val=y_val_scaled,
            epochs=ENHANCED_EPOCHS,
            batch_size=ENHANCED_BATCH_SIZE
        )
        
        training_results[model_name] = history
        
        # Estimate R² from validation loss
        if history and 'val_loss' in history.history:
            best_val_loss = min(history.history['val_loss'])
            estimated_r2 = calculate_r2_from_val_loss(best_val_loss, data_variance)
            print(f"  ESTIMATED R² for {model_name}: {estimated_r2:.4f} ({estimated_r2*100:.1f}%)")
            
            if estimated_r2 >= 0.85:
                print(f"  🎉 TARGET ACHIEVED! {model_name} reached 85%+ accuracy!")
            elif estimated_r2 >= 0.80:
                print(f"  🔥 EXCELLENT! {model_name} achieved 80%+ accuracy!")
            else:
                print(f"  📈 GOOD PROGRESS! {model_name} achieved {estimated_r2*100:.1f}% accuracy")
        
        # Memory management
        import gc
        gc.collect()
    
    # Enhanced evaluation
    print("\nSTEP 7: ENHANCED MODEL EVALUATION")
    print("-" * 40)
    
    best_model_info = None
    best_r2 = 0.0
    
    print("Evaluating all trained models for best performance...")
    
    for i, model_info in enumerate(ensemble_models):
        model_name = model_info['name']
        model = model_info['model']
        
        try:
            # Make predictions
            y_pred_scaled = model.predict(X_test_scaled, verbose=0)
            y_pred = target_scaler.inverse_transform(y_pred_scaled)
            y_true = target_scaler.inverse_transform(y_test_scaled)
            
            # Calculate comprehensive metrics
            mae = mean_absolute_error(y_true.flatten(), y_pred.flatten())
            mse = mean_squared_error(y_true.flatten(), y_pred.flatten())
            rmse = np.sqrt(mse)
            r2 = r2_score(y_true.flatten(), y_pred.flatten())
            mape = np.mean(np.abs((y_true.flatten() - y_pred.flatten()) / (y_true.flatten() + 1e-8))) * 100
            
            print(f"\n{model_name} TEST RESULTS:")
            print(f"  MAE: {mae:.2f}")
            print(f"  RMSE: {rmse:.2f}")
            print(f"  MAPE: {mape:.2f}%")
            print(f"  R²: {r2:.4f} ({r2*100:.1f}%)")
            
            if r2 > best_r2:
                best_r2 = r2
                best_model_info = {
                    'name': model_name,
                    'model': model,
                    'r2': r2,
                    'mae': mae,
                    'rmse': rmse,
                    'mape': mape
                }
            
            # Achievement badges
            if r2 >= 0.90:
                print(f"  🏆 OUTSTANDING! 90%+ accuracy achieved!")
            elif r2 >= 0.85:
                print(f"  TARGET ACHIEVED! 85%+ accuracy reached!")
            elif r2 >= 0.80:
                print(f"  EXCELLENT! 80%+ accuracy achieved!")
            else:
                print(f"  Good progress: {r2*100:.1f}% accuracy")
                
        except Exception as e:
            print(f"  Error evaluating {model_name}: {e}")
    
    # Final results
    print(f"\n" + "="*70)
    print("ENHANCED LSTM TRAINING COMPLETED - ACCURACY RESULTS")
    print("="*70)
    
    if best_model_info:
        print(f"\n BEST MODEL: {best_model_info['name']}")
        print(f"   R² Accuracy: {best_model_info['r2']:.4f} ({best_model_info['r2']*100:.1f}%)")
        print(f"   MAE: {best_model_info['mae']:.2f}")
        print(f"   RMSE: {best_model_info['rmse']:.2f}")
        print(f"   MAPE: {best_model_info['mape']:.2f}%")
        
        if best_model_info['r2'] >= 0.85:
            print(f"\n TARGET OF 85%+ ACCURACY ACHIEVED!")
            print(f"Your model has reached {best_model_info['r2']*100:.1f}% R² accuracy!")
        elif best_model_info['r2'] >= 0.80:
            print(f"\n 80%+ accuracy achieved!")
            print(f"Very close to 85% target - try extended training for final push!")
        else:
            print(f"Progress report! {best_model_info['r2']*100:.1f}% accuracy achieved!")
            print(f"Recommendations for reaching 85%:")
            print(f"  • Increase training epochs to 200+")
            print(f"  • Add more diverse features")
            print(f"  • Try ensemble averaging")
    
    print(f"\nOptimizations Applied:")
    print(f"  ✓ Extended training: {ENHANCED_EPOCHS} epochs")
    print(f"  ✓ Enhanced features: {len(available_features)} variables")
    print(f"  ✓ Weekly sequence patterns: {SEQUENCE_LENGTH} hours")
    print(f"  ✓ Advanced architectures: Deep, Hybrid, Transformer models")
    print(f"  ✓ Sophisticated callbacks: Cyclical LR, extended patience")
    
    return best_model_info, training_results

# Execute the optimized pipeline
if __name__ == "__main__":
    best_model, results = main_optimized_for_accuracy()

EnergyNexus Advanced LSTM Architectures - OPTIMIZED FOR 85%+ ACCURACY
Development started: 2025-08-08 01:35:29
TensorFlow version: 2.19.0

EXECUTING OPTIMIZED ADVANCED LSTM PIPELINE FOR 85%+ ACCURACY

STEP 1: ENHANCED DATA LOADING AND PREPARATION
--------------------------------------------------
Successfully loaded processed energy dataset
Adding advanced features for enhanced LSTM performance...
Enhanced dataset shape after feature engineering: (201604, 64)
Data source: Processed pipeline data
Dataset shape: (201604, 64)
Date range: 2014-12-31 23:00:00+00:00 to 2020-09-30 23:45:00+00:00
Data completeness: 100.0%
Dataset size: 201604 records
Large dataset detected - moderate sampling to preserve patterns...
Sampled dataset size: 33601 records (sample rate: 1/6)
ENHANCED PARAMETERS for 85%+ accuracy:
  Sequence length: 168 hours (WEEKLY PATTERNS)
  Forecast horizons: [1, 6, 24]
Available columns: 64 total
Using first 3 numeric columns as targets: ['AT_load_actual_entsoe_transparency', 

KeyboardInterrupt: 