# ERCOT Price Forecasting Model Building and Visualization Demo

This script demonstrates how to build, train, and evaluate the hybrid forecasting model
for ERCOT electricity prices, and how to visualize the results.

In [None]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from datetime import datetime, timedelta
from typing import Dict, List, Tuple, Optional, Union, Any

# Add the project directory to the path so we can import modules
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Set plotting style
plt.style.use('ggplot')
# %matplotlib inline

# Import our modules
from src.data.ercot_price_data import ErcotPriceData
from src.data.ercot_weather_data import ErcotWeatherData
from src.models.hybrid_model import HybridModel
from src.utils.preprocessing import (
    align_time_series,
    create_time_features,
    create_lag_features,
    create_rolling_features,
    prepare_data_for_model
)
from src.visualization.plotting import (
    plot_price_forecast,
    plot_volatility_forecast,
    plot_price_components,
    plot_model_performance
)

## Data Loading and Preparation

First, let's load the price and weather data for the Houston hub.

In [None]:
# Define date range for our analysis
end_date = datetime.now()
start_date = end_date - timedelta(days=365)  # Use one year of data
test_start_date = end_date - timedelta(days=30)  # Last 30 days for testing

# Load price data
price_data = ErcotPriceData().load_data(
    start_date=start_date,
    end_date=end_date,
    price_node='HB_HOUSTON',
    resample_freq='H'
)

# Load weather data for Houston
weather_data = ErcotWeatherData().load_data(
    start_date=start_date,
    end_date=end_date,
    location='Houston',
    resample_freq='H'
)

print(f"Price data shape: {price_data.shape}")
print(f"Weather data shape: {weather_data.shape}")

## Data Preprocessing

Let's preprocess the data to align the time series and create features.

In [None]:
# Align price and weather data
aligned_data = align_time_series(price_data, weather_data)
print(f"Aligned data shape: {aligned_data.shape}")

# Create time features
data_with_time_features = create_time_features(aligned_data)
print(f"Data with time features shape: {data_with_time_features.shape}")
print(f"Columns: {data_with_time_features.columns.tolist()}")

# Create lag features for prices (lag of 1, 24, and 168 hours)
data_with_lag_features = create_lag_features(
    data_with_time_features, 
    target_column='price', 
    lag_periods=[1, 24, 168]
)
print(f"Data with lag features shape: {data_with_lag_features.shape}")

# Create rolling features (mean and std) with windows of 24 and 168 hours
data_with_rolling_features = create_rolling_features(
    data_with_lag_features, 
    target_column='price',
    windows=[24, 168],
    functions=['mean', 'std']
)
print(f"Data with rolling features shape: {data_with_rolling_features.shape}")

# Drop rows with NaN values (due to lagging)
preprocessed_data = data_with_rolling_features.dropna()
print(f"Preprocessed data shape (after dropping NaN): {preprocessed_data.shape}")

## Split Data into Training and Testing Sets

In [None]:
# Split data into training and testing sets
train_data = preprocessed_data[preprocessed_data.index < test_start_date]
test_data = preprocessed_data[preprocessed_data.index >= test_start_date]

print(f"Training data shape: {train_data.shape}")
print(f"Testing data shape: {test_data.shape}")

# Extract price and weather components
train_price = train_data[['price']]
test_price = test_data[['price']]

# Weather and feature columns (excluding price)
feature_columns = [col for col in train_data.columns if col != 'price']
train_features = train_data[feature_columns]
test_features = test_data[feature_columns]

## Model Configuration

Now let's configure and train our hybrid model. We'll set up a model with:
- 48-hour sequence length
- 24-hour forecast horizon
- GARCH(1,1) volatility model
- Normal error distribution

In [None]:
# Set model parameters
seq_length = 48  # Hours
forecast_horizon = 24  # Hours
garch_p = 1  # GARCH p parameter
garch_q = 1  # GARCH q parameter
mean_model = 'Zero'  # Mean model type for GARCH
vol_model = 'GARCH'  # Volatility model type
dist_model = 'normal'  # Error distribution

# Initialize the hybrid model
model = HybridModel(
    seq_length=seq_length,
    forecast_horizon=forecast_horizon,
    p=garch_p,
    q=garch_q,
    mean=mean_model,
    vol=vol_model,
    dist=dist_model
)

## Training the Model

Let's train the hybrid model on our training data. This will train both the neural network for price forecasting and the GARCH model for volatility forecasting.

In [None]:
# Train the model
print("Training the hybrid model...")
model.fit(
    price_data=train_price,
    weather_data=train_features,
    nn_epochs=50,
    nn_batch_size=32,
    nn_validation_split=0.2,
    verbose=1
)
print("Model training complete!")

## Making Predictions

Now let's use our trained model to make predictions on the test data.

In [None]:
# Generate predictions for the test period
print("Generating forecasts...")
forecasts = model.predict(
    price_data=test_price,
    weather_data=test_features,
    confidence_level=0.95
)

print(f"Forecast shape: {forecasts.shape}")
print(f"Forecast columns: {forecasts.columns.tolist()}")
forecasts.head()

## Visualizing Price Forecasts

Let's visualize the price forecasts compared to the actual prices.

In [None]:
# Plot price forecasts
fig_price = plot_price_forecast(
    forecasts=forecasts,
    historical_data=test_price,
    title="ERCOT Houston Hub Price Forecast (24-hour horizon)"
)
fig_price.show()

## Visualizing Volatility Forecasts

Now let's visualize the volatility forecasts.

In [None]:
# Calculate historical volatility (rolling standard deviation)
historical_volatility = test_price['price'].rolling(window=24).std().dropna()

# Extract variance forecasts from the model's output
variance_forecast = forecasts[['variance_forecast']]

# Plot volatility forecasts
fig_vol = plot_volatility_forecast(
    variance_forecast=variance_forecast,
    historical_volatility=historical_volatility,
    title="ERCOT Houston Hub Volatility Forecast"
)
fig_vol.show()

## Visualizing Price Components

Let's visualize the components that influence the price.

In [None]:
# Plot price components (last 7 days of testing data)
lookback_period = 168  # 7 days in hours
last_week_index = test_price.index[-lookback_period:]
last_week_price = test_price.loc[last_week_index]
last_week_features = test_features.loc[last_week_index]

fig_components = plot_price_components(
    price_data=last_week_price,
    weather_data=last_week_features,
    lookback_window=lookback_period,
    forecast_window=forecast_horizon,
    title="ERCOT Houston Hub Price Components"
)
fig_components.show()

## Evaluating Model Performance

Let's evaluate our model's performance by comparing forecasted prices to actual prices.

In [None]:
# Extract actual and forecasted prices
actual_prices = test_price['price']
forecasted_prices = forecasts['price_forecast']

# Calculate error metrics
forecast_errors = actual_prices - forecasted_prices
mse = np.mean(forecast_errors ** 2)
rmse = np.sqrt(mse)
mae = np.mean(np.abs(forecast_errors))
mape = np.mean(np.abs(forecast_errors / actual_prices)) * 100  # in percentage

print(f"Model Performance Metrics:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

# Plot model performance
fig_performance = plot_model_performance(
    actual_prices=actual_prices,
    forecasted_prices=forecasted_prices,
    train_test_split_date=test_start_date,
    title="ERCOT Houston Hub Forecast Performance"
)
fig_performance.show()

## Analyzing Forecast Errors

Let's analyze the distribution of forecast errors.

In [None]:
# Create a histogram of forecast errors
fig_errors = go.Figure()

fig_errors.add_trace(go.Histogram(
    x=forecast_errors,
    nbinsx=30,
    marker_color='blue',
    opacity=0.7
))

# Add a normal distribution curve for reference
x_range = np.linspace(forecast_errors.min(), forecast_errors.max(), 100)
mean_error = forecast_errors.mean()
std_error = forecast_errors.std()
y_norm = (1 / (std_error * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x_range - mean_error) / std_error) ** 2)
y_norm = y_norm * (forecast_errors.count() * (forecast_errors.max() - forecast_errors.min()) / 30)

fig_errors.add_trace(go.Scatter(
    x=x_range,
    y=y_norm,
    mode='lines',
    name='Normal Distribution',
    line=dict(color='red', dash='dash')
))

fig_errors.update_layout(
    title='Distribution of Forecast Errors',
    xaxis_title='Forecast Error ($/MWh)',
    yaxis_title='Frequency',
    template='plotly_white',
    height=500,
    width=800
)

fig_errors.show()

## Confidence Intervals

Let's analyze the reliability of our model's confidence intervals.

In [None]:
# Calculate how often the actual price falls within the confidence interval
lower_bound = forecasts['lower_bound']
upper_bound = forecasts['upper_bound']
within_bounds = ((actual_prices >= lower_bound) & (actual_prices <= upper_bound))
coverage_rate = within_bounds.mean() * 100

print(f"Confidence Interval Coverage: {coverage_rate:.2f}%")
print(f"Expected Coverage (95% confidence): 95.00%")

# Create a dataframe for plotting confidence intervals
ci_data = pd.DataFrame({
    'actual': actual_prices,
    'forecast': forecasted_prices,
    'lower_bound': lower_bound,
    'upper_bound': upper_bound
})

# Plot a sample of the confidence intervals (last 7 days)
last_7_days = ci_data.iloc[-168:]  # Last 7 days (168 hours)

fig_ci = go.Figure()

# Add actual price trace
fig_ci.add_trace(go.Scatter(
    x=last_7_days.index,
    y=last_7_days['actual'],
    mode='lines',
    name='Actual Price',
    line=dict(color='black', width=2)
))

# Add forecast price trace
fig_ci.add_trace(go.Scatter(
    x=last_7_days.index,
    y=last_7_days['forecast'],
    mode='lines',
    name='Forecasted Price',
    line=dict(color='blue', width=2)
))

# Add confidence interval
fig_ci.add_trace(go.Scatter(
    x=last_7_days.index.tolist() + last_7_days.index.tolist()[::-1],
    y=last_7_days['upper_bound'].tolist() + last_7_days['lower_bound'].tolist()[::-1],
    fill='toself',
    fillcolor='rgba(0, 176, 246, 0.2)',
    line=dict(color='rgba(255, 255, 255, 0)'),
    name='95% Confidence Interval'
))

fig_ci.update_layout(
    title='Price Forecast with 95% Confidence Interval (Last 7 Days)',
    xaxis_title='Date',
    yaxis_title='Price ($/MWh)',
    template='plotly_white',
    height=600,
    width=1000,
    legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1)
)

fig_ci.show()

## Saving and Loading the Model

Let's demonstrate how to save and load the trained model.

In [None]:
# Create a directory to save the model if it doesn't exist
os.makedirs('../models/saved', exist_ok=True)

# Save the model
model_path = '../models/saved/hybrid_model_houston'
model.save_models(model_path)
print(f"Model saved to {model_path}")

# Load the model
loaded_model = HybridModel(
    seq_length=seq_length,
    forecast_horizon=forecast_horizon,
    p=garch_p,
    q=garch_q,
    mean=mean_model,
    vol=vol_model,
    dist=dist_model
)
loaded_model.load_models(model_path)
print("Model loaded successfully!")

## Conclusion

In this script, we've demonstrated the complete workflow for building, training, and evaluating a hybrid forecasting model for ERCOT electricity prices:

1. Loading and preprocessing price and weather data
2. Configuring and training the hybrid model
3. Making price and volatility forecasts
4. Visualizing the results using various plots
5. Evaluating model performance with multiple metrics
6. Saving and loading the trained model

The hybrid model combines the strengths of neural networks for capturing complex patterns in the price series and GARCH models for modeling volatility, providing accurate forecasts with uncertainty estimates. 