# ERCOT Energy Market Intelligence – Google Colab

**Real-time demand forecasting & anomaly detection for the Texas ERCOT grid**

Run this notebook on [Google Colab](https://colab.research.google.com) for free GPU.

1. Clone the repo
2. Install dependencies
3. Add your EIA API key (get one at https://www.eia.gov/opendata/)
4. Run all cells

In [None]:
# Run this first: Setup (clone if needed)
import os

REPO_URL = "https://github.com/YOUR_USERNAME/ercot-market-intelligence.git"  # ← Update with your repo

if not os.path.exists("models"):
    !git clone {REPO_URL} ercot-repo
    os.chdir("ercot-repo")

!pip install -q torch pandas numpy scikit-learn xgboost requests matplotlib seaborn
print("Setup complete. CWD:", os.getcwd())

In [None]:
import sys
sys.path.insert(0, '.')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error
from sklearn.ensemble import IsolationForest
import xgboost as xgb

from models.data_pipeline import (
    fetch_grid_load,
    fetch_weather,
    fetch_forecast,
    engineer_features,
    prepare_gnn_dataset,
)
from models.gnn_forecaster import train_gnn_forecaster, predict_gnn
from models.gnn_anomaly import train_gnn_ae, detect_anomalies_gnn

# EIA API key: paste here, or use Colab Secrets (Keychain icon → Add EIA_API_KEY)
try:
    from google.colab import userdata
    EIA_API_KEY = userdata.get('EIA_API_KEY')
except:
    EIA_API_KEY = "YOUR_EIA_API_KEY"  # ← Replace with your key from https://www.eia.gov/opendata/

plt.style.use('seaborn-v0_8')
print("Environment ready.")

## 1. Fetch Real ERCOT + Weather Data

In [None]:
print("Fetching ERCOT grid load...")
df_grid = fetch_grid_load(EIA_API_KEY)

start = df_grid.index.min().strftime('%Y-%m-%d')
end = (df_grid.index.max() - pd.Timedelta(days=5)).strftime('%Y-%m-%d')
print(f"Fetching weather for {start} to {end}...")
df_weather = fetch_weather(start=start, end=end)

df_raw = df_grid.join(df_weather, how='inner')
df_forecast = fetch_forecast(EIA_API_KEY)
if not df_forecast.empty:
    df_raw = df_raw.join(df_forecast, how='inner')

df_model = engineer_features(df_raw)
print(f"Data ready: {len(df_model)} rows")

## 2. Prepare GNN Dataset

In [None]:
X_train, y_train, X_test, y_test, feature_names, adj = prepare_gnn_dataset(
    df_model, target_col='load_mw', seq_len=24, horizon=1, train_ratio=0.85
)
print(f"Train: {X_train.shape}, Test: {X_test.shape}")
print(f"Features: {feature_names}")

## 3. XGBoost Baseline

In [None]:
features = [f for f in ['hour', 'day_of_week', 'is_weekend', 'temperature_2m', 'load_lag_1h', 'load_lag_24h'] if f in df_model.columns]
split = int(len(df_model) * 0.85)
train_df, test_df = df_model.iloc[:split], df_model.iloc[split:]

xgb_model = xgb.XGBRegressor(n_estimators=200, learning_rate=0.05, max_depth=6, early_stopping_rounds=20)
xgb_model.fit(train_df[features], train_df['load_mw'], eval_set=[(test_df[features], test_df['load_mw'])], verbose=False)
xgb_pred = xgb_model.predict(test_df[features])
xgb_mape = mean_absolute_percentage_error(test_df['load_mw'], xgb_pred)
print(f"XGBoost MAPE: {xgb_mape:.2%}")

## 4. GNN Forecaster

In [None]:
gnn_model, gnn_losses, scale_params = train_gnn_forecaster(
    X_train, y_train, adj, epochs=20, batch_size=128
)
gnn_pred = predict_gnn(gnn_model, X_test, scale_params=scale_params)
gnn_mape = mean_absolute_percentage_error(y_test, gnn_pred)
print(f"GNN MAPE: {gnn_mape:.2%}")

In [None]:
plt.figure(figsize=(12, 5))
n_show = min(168, len(y_test))
plt.plot(y_test[:n_show], label='Actual', linewidth=2)
plt.plot(gnn_pred[:n_show], label='GNN', linestyle='--')
plt.plot(xgb_pred[:n_show], label='XGBoost', linestyle='--', alpha=0.8)
plt.title('ERCOT Load Forecast')
plt.ylabel('Load (MW)')
plt.legend()
plt.show()

## 5. Anomaly Detection

In [None]:
anomaly_features = [f for f in ['load_mw', 'temperature_2m', 'grid_stress'] if f in df_model.columns]
iso = IsolationForest(contamination=0.02, random_state=42)
df_model['iso_anomaly'] = iso.fit_predict(df_model[anomaly_features])
iso_count = (df_model['iso_anomaly'] == -1).sum()

ae_model, _ = train_gnn_ae(X_train, adj, epochs=15, batch_size=128)
_, mask, _ = detect_anomalies_gnn(ae_model, X_test, threshold_percentile=98)
gnn_anomalies = mask.sum()

print(f"Isolation Forest: {iso_count} anomalies")
print(f"GNN AutoEncoder: {gnn_anomalies} test anomalies")

In [None]:
print("="*50)
print("SUMMARY")
print("="*50)
print(f"XGBoost MAPE: {xgb_mape:.2%}")
print(f"GNN MAPE:     {gnn_mape:.2%}")
print(f"Anomalies: IF={iso_count}, GNN={gnn_anomalies}")
print("="*50)