# CAISO Electricity Demand Forecasting with LSTM

In [None]:
# Import required packages
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

## Data Loading

In [None]:
# Load dataset
df = pd.read_csv("/content/drive/MyDrive/caiso-electricity.csv")

# Prepare timestamp
df['timestamp'] = pd.to_datetime(df['UTC Timestamp (Interval Ending)'], errors='coerce')
df.set_index('timestamp', inplace=True)

# Display sample data
df.head()

## Model Definition

In [None]:
class LMP_LSTM_Multistep(nn.Module):
    """LSTM for multi-step electricity price forecasting"""
    def __init__(self, input_size=1, hidden_size=64, num_layers=2, pred_len=16):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, pred_len)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

## Data Preparation

In [8]:
def create_multistep_sequences(data, seq_length, pred_length):
    """Create input-output sequences for training"""
    xs, ys = [], []
    for i in range(len(data) - seq_length - pred_length):
        x = data[i:i+seq_length]
        y = data[i+seq_length:i+seq_length+pred_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

# Parameters
zones = ['SP-15 LMP', 'NP-15 LMP', 'ZP26 LMP', 'PGE-TDLMP', 'SCE-TDLMP']
SEQ_LEN = 24   # 6 hours of history (15-min intervals)
PRED_LEN = 16  # 4 hours forecast
EPOCHS = 30

## Training Pipeline

In [None]:
all_results = []

for zone in zones:
    if zone not in df.columns:
        print(f"Skipping {zone} - not found")
        continue

    print(f"\nProcessing {zone}")

    # 1. Prepare data
    zone_data = df[[zone]].dropna()
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(zone_data)

    # 2. Create sequences
    X, y = create_multistep_sequences(data_scaled, SEQ_LEN, PRED_LEN)
    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y.squeeze(), dtype=torch.float32)

    # 3. Create DataLoader
    dataset = TensorDataset(X_tensor, y_tensor)
    train_loader = DataLoader(dataset, batch_size=64, shuffle=True)

    # 4. Train model
    model = LMP_LSTM_Multistep(pred_len=PRED_LEN)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(EPOCHS):
        for xb, yb in train_loader:
            optimizer.zero_grad()
            pred = model(xb)
            loss = criterion(pred, yb)
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {loss.item():.4f}")

    # 5. Make predictions
    model.eval()
    with torch.no_grad():
        input_seq = torch.tensor(data_scaled[-SEQ_LEN:], dtype=torch.float32).unsqueeze(0)
        pred_scaled = model(input_seq).numpy().flatten()
        pred_actual = scaler.inverse_transform(pred_scaled.reshape(-1, 1)).flatten()

    # 6. Store results
    future_times = pd.date_range(start=zone_data.index[-1], periods=PRED_LEN + 1, freq='15T')[1:]
    actual_vals = zone_data[-PRED_LEN:].values.flatten()

    result_df = pd.DataFrame({
        "Zone": [zone] * len(future_times),
        "Timestamp": future_times,
        "Actual LMP": actual_vals,
        "Predicted LMP": pred_actual,
        "Error": np.abs(pred_actual - actual_vals)
    })
    all_results.append(result_df)

# Combine results
final_results = pd.concat(all_results, ignore_index=True)
final_results.head()


Processing SP-15 LMP
Epoch 1/30, Loss: 0.0393
Epoch 2/30, Loss: 0.0001
Epoch 3/30, Loss: 0.0001
Epoch 4/30, Loss: 0.0001
Epoch 5/30, Loss: 0.0001
Epoch 6/30, Loss: 0.0030
Epoch 7/30, Loss: 0.0002
Epoch 8/30, Loss: 0.0002
Epoch 9/30, Loss: 0.0001
Epoch 10/30, Loss: 0.0001
Epoch 11/30, Loss: 0.0001
Epoch 12/30, Loss: 0.0000
Epoch 13/30, Loss: 0.0370
Epoch 14/30, Loss: 0.0002
Epoch 15/30, Loss: 0.0001
Epoch 16/30, Loss: 0.0002
Epoch 17/30, Loss: 0.0001
Epoch 18/30, Loss: 0.0088
Epoch 19/30, Loss: 0.0000
Epoch 20/30, Loss: 0.0000
Epoch 21/30, Loss: 0.0003
Epoch 22/30, Loss: 0.0494
Epoch 23/30, Loss: 0.0001
Epoch 24/30, Loss: 0.0020
Epoch 25/30, Loss: 0.0003
Epoch 26/30, Loss: 0.0002
Epoch 27/30, Loss: 0.0002
Epoch 28/30, Loss: 0.0085
Epoch 29/30, Loss: 0.0010
Epoch 30/30, Loss: 0.0019

Processing NP-15 LMP


  future_times = pd.date_range(start=zone_data.index[-1], periods=PRED_LEN + 1, freq='15T')[1:]


Epoch 1/30, Loss: 0.0039
Epoch 2/30, Loss: 0.0002
Epoch 3/30, Loss: 0.0001
Epoch 4/30, Loss: 0.0004
Epoch 5/30, Loss: 0.0004
Epoch 6/30, Loss: 0.0016
Epoch 7/30, Loss: 0.0003
Epoch 8/30, Loss: 0.0003
Epoch 9/30, Loss: 0.0002
Epoch 10/30, Loss: 0.0002
Epoch 11/30, Loss: 0.0009
Epoch 12/30, Loss: 0.0005
Epoch 13/30, Loss: 0.0006
Epoch 14/30, Loss: 0.0001
Epoch 15/30, Loss: 0.0001
Epoch 16/30, Loss: 0.0002
Epoch 17/30, Loss: 0.0001
Epoch 18/30, Loss: 0.0001
Epoch 19/30, Loss: 0.0036
Epoch 20/30, Loss: 0.0002
Epoch 21/30, Loss: 0.0002
Epoch 22/30, Loss: 0.0001
Epoch 23/30, Loss: 0.0001
Epoch 24/30, Loss: 0.0006


## Visualization

In [None]:
import matplotlib.pyplot as plt

def plot_predictions(zone_name, results_df=final_results):
    """Plot actual vs predicted values"""
    zone_data = results_df[results_df['Zone'] == zone_name]

    plt.figure(figsize=(12, 5))
    plt.plot(zone_data['Timestamp'], zone_data['Actual LMP'], 'b-', label='Actual')
    plt.plot(zone_data['Timestamp'], zone_data['Predicted LMP'], 'r--', label='Predicted')
    plt.title(f'{zone_name} Price Forecast')
    plt.xlabel('Time')
    plt.ylabel('Price ($)')
    plt.legend()
    plt.grid()
    plt.show()

# Example plot
plot_predictions(zones[0])