# ERCOT Gridstatus Integration Demo

This notebook demonstrates how to use the gridstatus library integration with our ERCOT price forecasting project to fetch real data from ERCOT.

In [None]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from datetime import datetime, timedelta

# Add the project directory to the path so we can import modules
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Set plotting style
plt.style.use('ggplot')
%matplotlib inline

# Import our modules
from src.data.ercot_gridstatus_integration import ErcotGridstatusIntegration
from src.data.ercot_price_data import ErcotPriceData
from src.data.ercot_weather_data import ErcotWeatherData
from src.visualization.plotting import (
    plot_price_forecast, 
    plot_volatility_forecast, 
    plot_price_components, 
    plot_model_performance
)

## Initialize the ERCOT Gridstatus Integration

We'll create an instance of the `ErcotGridstatusIntegration` class to fetch real data from ERCOT. If you have an ERCOT API key, you can provide it here, but it's not necessary for most operations.

In [None]:
# Initialize the gridstatus integration
ercot_gridstatus = ErcotGridstatusIntegration()

# Let's also initialize our synthetic data classes for comparison
ercot_price = ErcotPriceData()
ercot_weather = ErcotWeatherData()

## Fetching Real ERCOT Price Data

Let's fetch real price data for the last week from ERCOT.

In [None]:
# Define date range (last 7 days)
end_date = datetime.now()
start_date = end_date - timedelta(days=7)

# Fetch real-time price data for Houston Hub
try:
    real_price_data = ercot_gridstatus.fetch_price_data(
        start_date=start_date,
        end_date=end_date,
        price_node='HB_HOUSTON',
        market='real_time',
        resample_freq='H'  # Resample to hourly for easier visualization
    )
    
    print(f"Successfully fetched real ERCOT price data")
    print(f"Shape: {real_price_data.shape}")
    print(f"Date range: {real_price_data.index.min()} to {real_price_data.index.max()}")
    print(f"Price range: ${real_price_data['price'].min():.2f} to ${real_price_data['price'].max():.2f} per MWh")
    print(f"Average price: ${real_price_data['price'].mean():.2f} per MWh")
    
    # Display the first few rows
    real_price_data.head()
except Exception as e:
    print(f"Error fetching real price data: {e}")
    # Fallback to synthetic data if real data fetch fails
    print("Falling back to synthetic data...")
    real_price_data = ercot_price.load_data(
        start_date=start_date,
        end_date=end_date,
        price_node='HB_HOUSTON',
        resample_freq='H'
    )
    print("Successfully generated synthetic price data")
    real_price_data.head()

## Fetching Real ERCOT Weather Data

Now, let's fetch weather data for the same period.

In [None]:
# Fetch weather data for Houston
try:
    real_weather_data = ercot_gridstatus.fetch_weather_data(
        start_date=start_date,
        end_date=end_date,
        location='Houston',
        resample_freq='H'  # Hourly data
    )
    
    print(f"Successfully fetched real ERCOT weather data")
    print(f"Shape: {real_weather_data.shape}")
    print(f"Date range: {real_weather_data.index.min()} to {real_weather_data.index.max()}")
    print(f"Temperature range: {real_weather_data['temperature'].min():.1f} to {real_weather_data['temperature'].max():.1f} °C")
    print(f"Average temperature: {real_weather_data['temperature'].mean():.1f} °C")
    
    # Display the first few rows
    real_weather_data.head()
except Exception as e:
    print(f"Error fetching real weather data: {e}")
    # Fallback to synthetic data if real data fetch fails
    print("Falling back to synthetic data...")
    real_weather_data = ercot_weather.load_data(
        start_date=start_date,
        end_date=end_date,
        location='Houston',
        resample_freq='H'
    )
    print("Successfully generated synthetic weather data")
    real_weather_data.head()

## Visualizing the Real Price Data

Let's create a plot to visualize the real price data.

In [None]:
# Create a plotly figure for price data
fig = go.Figure()

# Add the price trace
fig.add_trace(go.Scatter(
    x=real_price_data.index,
    y=real_price_data['price'],
    mode='lines',
    name='ERCOT Houston Hub Price',
    line=dict(color='blue', width=2)
))

# Update layout
fig.update_layout(
    title='Real-Time ERCOT Houston Hub Prices',
    xaxis_title='Date',
    yaxis_title='Price ($/MWh)',
    height=600,
    width=1000,
    template='plotly_white'
)

fig.show()

## Visualizing Weather Data

Now let's visualize the weather data to see how it correlates with price.

In [None]:
# Create a figure with multiple subplots for weather variables
fig = go.Figure()

# Add temperature trace
fig.add_trace(go.Scatter(
    x=real_weather_data.index,
    y=real_weather_data['temperature'],
    mode='lines',
    name='Temperature (°C)',
    line=dict(color='red', width=2)
))

# Update layout
fig.update_layout(
    title='Temperature in Houston',
    xaxis_title='Date',
    yaxis_title='Temperature (°C)',
    height=400,
    width=1000,
    template='plotly_white'
)

fig.show()

# Create a figure for wind speed
fig2 = go.Figure()

# Add wind speed trace
fig2.add_trace(go.Scatter(
    x=real_weather_data.index,
    y=real_weather_data['wind_speed'],
    mode='lines',
    name='Wind Speed (m/s)',
    line=dict(color='green', width=2)
))

# Update layout
fig2.update_layout(
    title='Wind Speed in Houston',
    xaxis_title='Date',
    yaxis_title='Wind Speed (m/s)',
    height=400,
    width=1000,
    template='plotly_white'
)

fig2.show()

# Create a figure for solar irradiance
fig3 = go.Figure()

# Add solar irradiance trace
fig3.add_trace(go.Scatter(
    x=real_weather_data.index,
    y=real_weather_data['solar_irradiance'],
    mode='lines',
    name='Solar Irradiance (W/m²)',
    line=dict(color='orange', width=2)
))

# Update layout
fig3.update_layout(
    title='Solar Irradiance in Houston',
    xaxis_title='Date',
    yaxis_title='Solar Irradiance (W/m²)',
    height=400,
    width=1000,
    template='plotly_white'
)

fig3.show()

## Fuel Mix Analysis

Let's fetch and visualize the fuel mix data to understand how different generation types are contributing to the grid.

In [None]:
# Fetch fuel mix data
try:
    fuel_mix_data = ercot_gridstatus.fetch_fuel_mix(
        start_date=start_date,
        end_date=end_date
    )
    
    print(f"Successfully fetched ERCOT fuel mix data")
    print(f"Shape: {fuel_mix_data.shape}")
    print(f"Date range: {fuel_mix_data.index.min()} to {fuel_mix_data.index.max()}")
    print(f"Available fuel types: {', '.join(fuel_mix_data.columns)}")
    
    # Display the first few rows
    fuel_mix_data.head()
    
    # Plot the fuel mix
    fig = go.Figure()
    
    # Get the main generation types (exclude less common ones for clarity)
    main_fuels = ['Wind', 'Solar', 'Natural Gas', 'Coal', 'Nuclear']
    available_fuels = [fuel for fuel in main_fuels if fuel in fuel_mix_data.columns]
    
    # Add traces for each fuel type
    for fuel in available_fuels:
        fig.add_trace(go.Scatter(
            x=fuel_mix_data.index,
            y=fuel_mix_data[fuel],
            mode='lines',
            name=fuel,
            stackgroup='one'
        ))
    
    # Update layout
    fig.update_layout(
        title='ERCOT Generation by Fuel Type',
        xaxis_title='Date',
        yaxis_title='Generation (MW)',
        height=600,
        width=1000,
        template='plotly_white',
        legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1)
    )
    
    fig.show()
    
except Exception as e:
    print(f"Error fetching fuel mix data: {e}")
    # No fallback for fuel mix as we don't have synthetic data for it

## Correlation Analysis

Let's analyze the correlation between prices and weather variables to understand the relationship.

In [None]:
# Combine price and weather data
# First, ensure both dataframes have the same index
common_index = real_price_data.index.intersection(real_weather_data.index)
combined_data = pd.DataFrame({
    'price': real_price_data.loc[common_index, 'price'],
    'temperature': real_weather_data.loc[common_index, 'temperature'],
    'wind_speed': real_weather_data.loc[common_index, 'wind_speed'],
    'solar_irradiance': real_weather_data.loc[common_index, 'solar_irradiance'],
    'humidity': real_weather_data.loc[common_index, 'humidity']
})

# Calculate correlation matrix
correlation_matrix = combined_data.corr()

# Display correlation matrix
print("Correlation Matrix:")
print(correlation_matrix)

# Visualize correlation matrix as a heatmap
fig = go.Figure()

fig.add_trace(go.Heatmap(
    z=correlation_matrix.values,
    x=correlation_matrix.columns,
    y=correlation_matrix.index,
    colorscale='RdBu',
    zmin=-1,
    zmax=1,
    text=np.around(correlation_matrix.values, decimals=2),
    texttemplate="%{text}",
    showscale=True
))

fig.update_layout(
    title='Correlation Matrix: Price vs Weather Variables',
    height=500,
    width=700,
    template='plotly_white'
)

fig.show()

## Price Comparison Across Nodes

Let's compare prices across different ERCOT nodes.

In [None]:
# Get list of available price nodes
price_nodes = list(ercot_gridstatus.get_available_price_nodes().keys())
print(f"Available price nodes: {', '.join(price_nodes)}")

# Select a subset of nodes to compare
nodes_to_compare = ['HB_HOUSTON', 'HB_NORTH', 'HB_WEST']

# Fetch price data for each node
prices_by_node = {}
for node in nodes_to_compare:
    try:
        node_data = ercot_gridstatus.fetch_price_data(
            start_date=start_date,
            end_date=end_date,
            price_node=node,
            market='real_time',
            resample_freq='H'
        )
        prices_by_node[node] = node_data['price']
        print(f"Successfully fetched price data for {node}")
    except Exception as e:
        print(f"Error fetching data for {node}: {e}")

# Create a dataframe with prices from all nodes
if prices_by_node:
    all_prices_df = pd.DataFrame(prices_by_node)
    
    # Plot comparison
    fig = go.Figure()
    
    for node in all_prices_df.columns:
        fig.add_trace(go.Scatter(
            x=all_prices_df.index,
            y=all_prices_df[node],
            mode='lines',
            name=node
        ))
    
    fig.update_layout(
        title='ERCOT Real-Time Prices Across Nodes',
        xaxis_title='Date',
        yaxis_title='Price ($/MWh)',
        height=600,
        width=1000,
        template='plotly_white',
        legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1)
    )
    
    fig.show()
    
    # Calculate and show price spread statistics
    # Price spread = max price - min price at each timestamp
    all_prices_df['price_spread'] = all_prices_df.max(axis=1) - all_prices_df.min(axis=1)
    
    print(f"\nPrice Spread Statistics:")
    print(f"Average spread: ${all_prices_df['price_spread'].mean():.2f} per MWh")
    print(f"Maximum spread: ${all_prices_df['price_spread'].max():.2f} per MWh")
    print(f"Minimum spread: ${all_prices_df['price_spread'].min():.2f} per MWh")
else:
    print("No price data available for comparison")

## System-Wide Data Analysis

Let's analyze system-wide data to understand the overall grid conditions.

In [None]:
# Fetch system-wide data
try:
    system_data = ercot_gridstatus.fetch_system_wide_data(
        start_date=start_date,
        end_date=end_date
    )
    
    print(f"Successfully fetched system-wide data")
    print(f"Shape: {system_data.shape}")
    print(f"Date range: {system_data.index.min()} to {system_data.index.max()}")
    print(f"Available metrics: {', '.join(system_data.columns)}")
    
    # Display the first few rows
    system_data.head()
    
    # Plot system load data if available
    load_columns = [col for col in system_data.columns if 'load' in col.lower()]
    
    if load_columns:
        fig = go.Figure()
        
        for col in load_columns:
            fig.add_trace(go.Scatter(
                x=system_data.index,
                y=system_data[col],
                mode='lines',
                name=col
            ))
        
        fig.update_layout(
            title='ERCOT System Load',
            xaxis_title='Date',
            yaxis_title='Load (MW)',
            height=600,
            width=1000,
            template='plotly_white',
            legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1)
        )
        
        fig.show()
    
except Exception as e:
    print(f"Error fetching system-wide data: {e}")

## Price vs. Load Analysis

Let's analyze the relationship between system load and prices.

In [None]:
# Check if we have both price and system data available
if 'real_price_data' in locals() and 'system_data' in locals():
    # Find the load column
    load_column = next((col for col in system_data.columns if 'load' in col.lower()), None)
    
    if load_column:
        # Align the data by reindexing to a common index
        common_index = real_price_data.index.intersection(system_data.index)
        
        # Create a dataframe with price and load data
        price_load_df = pd.DataFrame({
            'price': real_price_data.loc[common_index, 'price'],
            'load': system_data.loc[common_index, load_column]
        })
        
        # Calculate correlation
        correlation = price_load_df.corr().iloc[0, 1]
        print(f"Correlation between price and load: {correlation:.4f}")
        
        # Create a scatter plot of price vs. load
        fig = go.Figure()
        
        fig.add_trace(go.Scatter(
            x=price_load_df['load'],
            y=price_load_df['price'],
            mode='markers',
            marker=dict(
                size=8,
                color=price_load_df.index.hour,  # Color by hour of day
                colorscale='Viridis',
                colorbar=dict(title='Hour of Day')
            )
        ))
        
        # Add a trend line
        if len(price_load_df) > 1:  # Need at least 2 points for a trend line
            z = np.polyfit(price_load_df['load'], price_load_df['price'], 1)
            p = np.poly1d(z)
            
            x_range = np.linspace(price_load_df['load'].min(), price_load_df['load'].max(), 100)
            fig.add_trace(go.Scatter(
                x=x_range,
                y=p(x_range),
                mode='lines',
                name=f'Trend Line (r={correlation:.2f})',
                line=dict(color='red', dash='dash')
            ))
        
        fig.update_layout(
            title='ERCOT Price vs. System Load',
            xaxis_title='System Load (MW)',
            yaxis_title='Price ($/MWh)',
            height=600,
            width=1000,
            template='plotly_white'
        )
        
        fig.show()
        
        # Plot price and load on dual y-axes
        fig2 = go.Figure()
        
        # Add price trace
        fig2.add_trace(go.Scatter(
            x=price_load_df.index,
            y=price_load_df['price'],
            name='Price ($/MWh)',
            mode='lines',
            line=dict(color='blue', width=2)
        ))
        
        # Add load trace on secondary y-axis
        fig2.add_trace(go.Scatter(
            x=price_load_df.index,
            y=price_load_df['load'],
            name='Load (MW)',
            mode='lines',
            line=dict(color='orange', width=2),
            yaxis='y2'
        ))
        
        # Update layout with dual y-axes
        fig2.update_layout(
            title='ERCOT Price and System Load Over Time',
            xaxis_title='Date',
            yaxis=dict(
                title='Price ($/MWh)',
                titlefont=dict(color='blue'),
                tickfont=dict(color='blue')
            ),
            yaxis2=dict(
                title='Load (MW)',
                titlefont=dict(color='orange'),
                tickfont=dict(color='orange'),
                anchor='x',
                overlaying='y',
                side='right'
            ),
            height=600,
            width=1000,
            template='plotly_white',
            legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1)
        )
        
        fig2.show()
    else:
        print("No load data available in system data")
else:
    print("Price or system data not available for analysis")

## Conclusion

In this notebook, we've demonstrated how to use the gridstatus library integration to fetch real ERCOT data for price forecasting. The integration allows us to:

1. Fetch real-time and day-ahead price data for various ERCOT nodes
2. Access weather data and other system-wide metrics
3. Analyze the relationship between prices, weather, and system load
4. Visualize the data to gain insights

This integration enhances our forecasting capabilities by providing access to real historical data rather than relying solely on synthetic data generation. 