# LSTM Multi-Step Load Forecasting with PyTorch

From the [Sisyphean Gridworks ML Playground](https://sgridworks.com/ml-playground/guides/10-advanced-load-forecasting.html)

## Setup

Clone the repository and install dependencies. Run this cell first.

In [None]:
!git clone https://github.com/SGridworks/Dynamic-Network-Model.git 2>/dev/null || echo 'Already cloned'
%cd Dynamic-Network-Model
!pip install -q pandas numpy matplotlib seaborn scikit-learn xgboost lightgbm pyarrow

## Load and Prepare the Data

We start with the same SP&L load profile and weather data from Guide 02, but this time we will keep the raw 15-minute series intact rather than flattening it into tabular features.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from demo_data.load_demo_data import load_load_profiles, load_weather_data

# Load 15-minute feeder load profiles
load = load_load_profiles()

# Load hourly weather
weather = load_weather_data()

# Focus on Feeder FDR-0001 (same as Guide 02)
feeder = load[load["feeder_id"] == "FDR-0001"].copy()
feeder = feeder.sort_values("timestamp").reset_index(drop=True)

# Merge weather data (weather is hourly; load is 15-min)
weather["timestamp"] = pd.to_datetime(weather["timestamp"])
df = feeder.merge(
    weather[["timestamp", "temperature", "humidity", "wind_speed"]],
    on="timestamp", how="left"
)
df = df.dropna(subset=["temperature"]).reset_index(drop=True)

print(f"Total 15-minute records: {len(df):,}")
print(f"Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
print(f"Columns: {list(df.columns)}")

## Explore Temporal Patterns

Before building any model, it is critical to understand the patterns your LSTM needs to capture. Electricity load exhibits strong daily, weekly, and seasonal cycles that a well-trained LSTM should learn to reproduce.

In [None]:
# Add time features for exploration
df["hour"] = df["timestamp"].dt.hour
df["day_of_week"] = df["timestamp"].dt.dayofweek
df["month"] = df["timestamp"].dt.month

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Daily cycle: average load by hour
df.groupby("hour")["load_mw"].mean().plot(
    ax=axes[0, 0], color="#2D6A7A", linewidth=2)
axes[0, 0].set_title("Daily Cycle: Average Load by Hour")
axes[0, 0].set_ylabel("Load (MW)")

# Weekly cycle: average load by day of week
day_names = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
weekly = df.groupby("day_of_week")["load_mw"].mean()
axes[0, 1].bar(day_names, weekly.values, color="#5FCCDB")
axes[0, 1].set_title("Weekly Cycle: Average Load by Day")
axes[0, 1].set_ylabel("Load (MW)")

# Seasonal cycle: average load by month
df.groupby("month")["load_mw"].mean().plot(
    kind="bar", ax=axes[1, 0], color="#2D6A7A")
axes[1, 0].set_title("Seasonal Cycle: Average Load by Month")
axes[1, 0].set_ylabel("Load (MW)")

# Temperature vs load scatter
axes[1, 1].scatter(df["temperature"], df["load_mw"],
                    alpha=0.02, s=1, color="#2D6A7A")
axes[1, 1].set_title("Temperature vs Load (U-shaped)")
axes[1, 1].set_xlabel("Temperature (°F)")
axes[1, 1].set_ylabel("Load (MW)")

plt.tight_layout()
plt.show()

## Create Sliding Window Sequences

LSTMs consume fixed-length sequences. We use a sliding window: given the past 168 hours (1 week) of load data, predict the next 24 hours of load. Each window slides forward by one hour to create overlapping training samples.

In [None]:
# Configuration
INPUT_HOURS = 168   # 7 days of history as input
OUTPUT_HOURS = 24   # predict 24 hours ahead

# Extract the target series
load_series = df["load_mw"].values.reshape(-1, 1)

# IMPORTANT: fit scaler on training data only (first 70%) to avoid data leakage
train_end_idx = int(len(df) * 0.7)
scaler_load = StandardScaler()
scaler_load.fit(load_series[:train_end_idx])
load_scaled = scaler_load.transform(load_series).flatten()

print(f"Load mean: {scaler_load.mean_[0]:.3f} MW")
print(f"Load std:  {scaler_load.scale_[0]:.3f} MW")

# Build sliding window sequences
def create_sequences(data, input_len, output_len):
    """Create input/output pairs using a sliding window."""
    X, y = [], []
    for i in range(len(data) - input_len - output_len + 1):
        X.append(data[i : i + input_len])
        y.append(data[i + input_len : i + input_len + output_len])
    return np.array(X), np.array(y)

X_all, y_all = create_sequences(load_scaled, INPUT_HOURS, OUTPUT_HOURS)

print(f"\nTotal sequences: {len(X_all):,}")
print(f"Input shape:  {X_all.shape}  (samples, 168 timesteps)")
print(f"Output shape: {y_all.shape}  (samples, 24 timesteps)")

## Chronological Train/Validation/Test Split

Time-series data must be split chronologically. We use the first 70% of sequences for training, the next 15% for validation (hyperparameter tuning), and the final 15% for testing. No shuffling—the model never sees the future during training.

In [None]:
# Chronological split: 70% train, 15% val, 15% test
n = len(X_all)
train_end = int(n * 0.70)
val_end   = int(n * 0.85)

X_train, y_train = X_all[:train_end],        y_all[:train_end]
X_val,   y_val   = X_all[train_end:val_end],  y_all[train_end:val_end]
X_test,  y_test  = X_all[val_end:],           y_all[val_end:]

print(f"Train: {len(X_train):,} sequences (first 70%)")
print(f"Val:   {len(X_val):,} sequences (next 15%)")
print(f"Test:  {len(X_test):,} sequences (final 15%)")

## Build the LSTM Model in PyTorch

Now we define the LSTM architecture. The model processes the 168-hour input sequence one timestep at a time, building up a hidden state that summarizes the history. The final hidden state is then passed through fully connected layers to produce the 24-hour forecast.

In [None]:
# PyTorch Dataset for batching
class LoadDataset(Dataset):
    def __init__(self, X, y):
        # Reshape X to (samples, timesteps, features=1)
        self.X = torch.FloatTensor(X).unsqueeze(-1)
        self.y = torch.FloatTensor(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create DataLoaders
BATCH_SIZE = 64
train_ds = LoadDataset(X_train, y_train)
val_ds   = LoadDataset(X_val, y_val)
test_ds  = LoadDataset(X_test, y_test)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE)

## Train the LSTM

We train using MSE loss with the Adam optimizer and an exponential learning rate scheduler. Early stopping on validation loss prevents overfitting.

In [None]:
# Define the LSTM model
class LoadForecaster(nn.Module):
    def __init__(self, input_size=1, hidden_size=128,
                 num_layers=2, output_size=24, dropout=0.2):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # LSTM layers: process the input sequence
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout
        )

        # Fully connected layers: map hidden state to forecast
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, 64),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(64, output_size)
        )

    def forward(self, x):
        # x shape: (batch, 168, 1)
        # lstm_out shape: (batch, 168, hidden_size)
        lstm_out, (h_n, c_n) = self.lstm(x)

        # Use the last hidden state to produce the forecast
        last_hidden = lstm_out[:, -1, :]  # (batch, hidden_size)
        forecast = self.fc(last_hidden)    # (batch, 24)
        return forecast

# Initialize model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LoadForecaster().to(device)

print(f"Device: {device}")
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
print(model)

## Multi-Step Evaluation: MAPE, RMSE, and Visualization

Now we evaluate the trained LSTM on the held-out test set (final 15%). Since the model outputs 24 values at once, we can assess accuracy at each forecast horizon (1 hour ahead, 2 hours ahead, ... 24 hours ahead).

In [None]:
# Training configuration
EPOCHS = 30
LR = 1e-3
PATIENCE = 5  # early stopping patience

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)

# Training loop with early stopping
train_losses, val_losses = [], []
best_val_loss = float("inf")
patience_counter = 0

for epoch in range(EPOCHS):
    # --- Training ---
    model.train()
    epoch_train_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)

        optimizer.zero_grad()
        predictions = model(X_batch)
        loss = criterion(predictions, y_batch)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        epoch_train_loss += loss.item() * len(X_batch)

    avg_train = epoch_train_loss / len(train_ds)

    # --- Validation ---
    model.eval()
    epoch_val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            epoch_val_loss += loss.item() * len(X_batch)

    avg_val = epoch_val_loss / len(val_ds)

    train_losses.append(avg_train)
    val_losses.append(avg_val)
    scheduler.step()

    print(f"Epoch {epoch+1:2d}/{EPOCHS} | "
          f"Train Loss: {avg_train:.6f} | Val Loss: {avg_val:.6f}")

    # Early stopping
    if avg_val save(model.state_dict(), "best_lstm_load.pt")
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= PATIENCE:
            print(f"Early stopping at epoch {epoch+1}")
            break

# Load best model
model.load_state_dict(torch.load("best_lstm_load.pt"))
print(f"\nBest validation loss: {best_val_loss:.6f}")

## Add Weather Features as Exogenous Inputs

So far our LSTM only sees historical load. But temperature is the single biggest driver of demand. Let's add weather data as additional input features alongside load. This transforms the LSTM from a univariate to a multivariate model.

In [None]:
# Plot training curves
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(train_losses, label="Train Loss", linewidth=2)
ax.plot(val_losses, label="Validation Loss", linewidth=2)
ax.set_xlabel("Epoch")
ax.set_ylabel("MSE Loss (normalized)")
ax.set_title("LSTM Training and Validation Loss")
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## Compare LSTM to Gradient Boosting Baseline

In Guide 02, you built a Gradient Boosting model that predicted one hour ahead with hand-crafted features. Let's rebuild that baseline and compare it to our LSTM models. Note that this is a “direct multi-output LSTM vs. single-step GB” comparison—the GB model produces one-step-ahead predictions using known lag features, while the LSTM forecasts all 24 hours simultaneously without iterative re-feeding. These represent different forecasting paradigms, and the MAPE numbers are not directly apples-to-apples. The GB number represents the best-case scenario for a one-step model; in an autoregressive 24-step rollout (where each prediction feeds into the next), the GB’s error would compound and grow substantially.

In [None]:
# Generate predictions on the test set
model.eval()
all_preds, all_actuals = [], []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        preds = model(X_batch).cpu().numpy()
        all_preds.append(preds)
        all_actuals.append(y_batch.numpy())

y_pred_scaled = np.concatenate(all_preds)
y_true_scaled = np.concatenate(all_actuals)

# Inverse-transform back to MW
y_pred_mw = scaler_load.inverse_transform(
    y_pred_scaled.reshape(-1, 1)).reshape(-1, OUTPUT_HOURS)
y_true_mw = scaler_load.inverse_transform(
    y_true_scaled.reshape(-1, 1)).reshape(-1, OUTPUT_HOURS)

# Overall metrics
mae  = mean_absolute_error(y_true_mw.flatten(), y_pred_mw.flatten())
rmse = np.sqrt(mean_squared_error(y_true_mw.flatten(), y_pred_mw.flatten()))
mape = np.mean(np.abs(
    (y_true_mw.flatten() - y_pred_mw.flatten()) / y_true_mw.flatten()
)) * 100

print(f"Test Set Metrics (all 24 horizons):")
print(f"  MAE:  {mae:.4f} MW")
print(f"  RMSE: {rmse:.4f} MW")
print(f"  MAPE: {mape:.2f}%")

## Common Mistakes in Time-Series ML

Before wrapping up, let's review several common pitfalls that can quietly undermine your results. These mistakes are especially prevalent in time-series forecasting and are worth internalizing before moving to production.

In [None]:
# Error by forecast horizon
horizon_mae = []
for h in range(OUTPUT_HOURS):
    h_mae = mean_absolute_error(y_true_mw[:, h], y_pred_mw[:, h])
    horizon_mae.append(h_mae)

fig, ax = plt.subplots(figsize=(10, 5))
ax.bar(range(1, 25), horizon_mae, color="#5FCCDB")
ax.set_xlabel("Forecast Horizon (hours ahead)")
ax.set_ylabel("MAE (MW)")
ax.set_title("Forecast Error by Horizon: Accuracy Degrades Gracefully")
ax.set_xticks(range(1, 25))
ax.grid(True, alpha=0.3, axis="y")
plt.tight_layout()
plt.show()

print(f"\n1-hour ahead MAE:  {horizon_mae[0]:.4f} MW")
print(f"12-hour ahead MAE: {horizon_mae[11]:.4f} MW")
print(f"24-hour ahead MAE: {horizon_mae[23]:.4f} MW")

## Wrap-Up and Next Steps

You built a multi-step LSTM load forecasting system that predicts an entire 24-hour load curve in a single forward pass. Here's what you accomplished:

In [None]:
# Plot a sample 24-hour forecast vs. actual
sample_idx = 500  # pick a sample from the test set

fig, ax = plt.subplots(figsize=(12, 5))
hours = range(1, 25)
ax.plot(hours, y_true_mw[sample_idx], "o-",
        label="Actual", linewidth=2, color="#2D6A7A")
ax.plot(hours, y_pred_mw[sample_idx], "s--",
        label="LSTM Forecast", linewidth=2, color="#5FCCDB")
ax.fill_between(hours,
    y_pred_mw[sample_idx] - 2 * horizon_mae,
    y_pred_mw[sample_idx] + 2 * horizon_mae,
    alpha=0.15, color="#5FCCDB", label="±2 MAE band")
ax.set_xlabel("Hours Ahead")
ax.set_ylabel("Load (MW)")
ax.set_title("Sample 24-Hour Load Forecast vs. Actual")
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Build multivariate feature array: [load, temperature, humidity, wind_speed]
features = df[["load_mw", "temperature",
               "humidity", "wind_speed"]].values

# Fit on training data only (same boundary as univariate scaler)
scaler_feat = StandardScaler()
scaler_feat.fit(features[:train_end_idx])
features_scaled = scaler_feat.transform(features)

# Build sequences with multivariate input, univariate output
def create_multivariate_sequences(features, targets, input_len, output_len):
    """
    features: (N, num_features) - all input channels
    targets:  (N,) - the load column only (already scaled)
    """
    X, y = [], []
    for i in range(len(features) - input_len - output_len + 1):
        X.append(features[i : i + input_len])       # (168, 4)
        y.append(targets[i + input_len : i + input_len + output_len])
    return np.array(X), np.array(y)

# Target is still just load (first column of scaled features)
target_scaled = features_scaled[:, 0]

X_mv, y_mv = create_multivariate_sequences(
    features_scaled, target_scaled, INPUT_HOURS, OUTPUT_HOURS)

print(f"Multivariate input shape:  {X_mv.shape}")
print(f"  (samples, 168 timesteps, 4 features)")
print(f"Output shape: {y_mv.shape}")

# Same chronological split
X_train_mv, y_train_mv = X_mv[:train_end],        y_mv[:train_end]
X_val_mv,   y_val_mv   = X_mv[train_end:val_end], y_mv[train_end:val_end]
X_test_mv,  y_test_mv  = X_mv[val_end:],          y_mv[val_end:]

In [None]:
# Updated Dataset for multivariate inputs
class MultiVarLoadDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)  # already (samples, 168, 4)
        self.y = torch.FloatTensor(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create new DataLoaders
train_mv_loader = DataLoader(
    MultiVarLoadDataset(X_train_mv, y_train_mv),
    batch_size=BATCH_SIZE, shuffle=True)
val_mv_loader = DataLoader(
    MultiVarLoadDataset(X_val_mv, y_val_mv),
    batch_size=BATCH_SIZE)
test_mv_loader = DataLoader(
    MultiVarLoadDataset(X_test_mv, y_test_mv),
    batch_size=BATCH_SIZE)

# Train multivariate model (input_size=4 now)
model_mv = LoadForecaster(input_size=4, hidden_size=128,
                           num_layers=2, output_size=24).to(device)

optimizer_mv = torch.optim.Adam(model_mv.parameters(), lr=LR)
scheduler_mv = torch.optim.lr_scheduler.ExponentialLR(optimizer_mv, gamma=0.95)

best_val_mv = float("inf")
patience_counter = 0

for epoch in range(EPOCHS):
    model_mv.train()
    epoch_loss = 0.0
    for X_b, y_b in train_mv_loader:
        X_b, y_b = X_b.to(device), y_b.to(device)
        optimizer_mv.zero_grad()
        loss = criterion(model_mv(X_b), y_b)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model_mv.parameters(), 1.0)
        optimizer_mv.step()
        epoch_loss += loss.item() * len(X_b)

    model_mv.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_b, y_b in val_mv_loader:
            X_b, y_b = X_b.to(device), y_b.to(device)
            val_loss += criterion(model_mv(X_b), y_b).item() * len(X_b)

    avg_val = val_loss / len(X_val_mv)
    scheduler_mv.step()

    if avg_val save(model_mv.state_dict(), "best_lstm_mv.pt")
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= PATIENCE:
            print(f"Early stopping at epoch {epoch+1}")
            break

    if (epoch + 1) % 5 == 0:
        print(f"Epoch {epoch+1:2d} | Val Loss: {avg_val:.6f}")

model_mv.load_state_dict(torch.load("best_lstm_mv.pt"))

# Evaluate multivariate model
all_preds_mv = []
with torch.no_grad():
    for X_b, _ in test_mv_loader:
        preds = model_mv(X_b.to(device)).cpu().numpy()
        all_preds_mv.append(preds)

y_pred_mv_scaled = np.concatenate(all_preds_mv)

# Inverse-transform using only the load column's statistics
load_mean = scaler_feat.mean_[0]
load_std  = scaler_feat.scale_[0]
y_pred_mv_mw = y_pred_mv_scaled * load_std + load_mean
y_true_mv_mw = y_test_mv * load_std + load_mean
y_true_mv_mw = y_true_mv_mw.reshape(-1, OUTPUT_HOURS)

mae_mv  = mean_absolute_error(y_true_mv_mw.flatten(), y_pred_mv_mw.flatten())
rmse_mv = np.sqrt(mean_squared_error(y_true_mv_mw.flatten(), y_pred_mv_mw.flatten()))
mape_mv = np.mean(np.abs(
    (y_true_mv_mw.flatten() - y_pred_mv_mw.flatten()) / y_true_mv_mw.flatten()
)) * 100

print(f"\nMultivariate LSTM (load + weather):")
print(f"  MAE:  {mae_mv:.4f} MW")
print(f"  RMSE: {rmse_mv:.4f} MW")
print(f"  MAPE: {mape_mv:.2f}%")

In [None]:
from sklearn.ensemble import GradientBoostingRegressor

# Rebuild the Guide 02 baseline for Feeder F01
# We need to predict 24 steps, so we do iterative 1-step forecasting
gb_df = df.copy()
gb_df["is_weekend"] = (gb_df["day_of_week"] >= 5).astype(int)
gb_df["load_lag_24h"]  = gb_df["load_mw"].shift(24)
gb_df["load_lag_168h"] = gb_df["load_mw"].shift(168)
gb_df["load_rolling_24h"] = gb_df["load_mw"].rolling(24).mean()
gb_df = gb_df.dropna()

feature_cols = ["hour", "day_of_week", "month", "is_weekend",
                "temperature", "humidity", "wind_speed",
                "load_lag_24h", "load_lag_168h", "load_rolling_24h"]

gb_split = int(len(gb_df) * 0.85)
gb_train = gb_df.iloc[:gb_split]
gb_test  = gb_df.iloc[gb_split:]

gb_model = GradientBoostingRegressor(
    n_estimators=300, max_depth=5, learning_rate=0.1, random_state=42)
gb_model.fit(gb_train[feature_cols], gb_train["load_mw"])

# 1-step GB prediction on the test set
gb_pred = gb_model.predict(gb_test[feature_cols])
gb_mae = mean_absolute_error(gb_test["load_mw"], gb_pred)
gb_rmse = np.sqrt(mean_squared_error(gb_test["load_mw"], gb_pred))
gb_mape = np.mean(np.abs(
    (gb_test["load_mw"].values - gb_pred) / gb_test["load_mw"].values
)) * 100

print("=" * 55)
print("Model Comparison (Test Set: 2024)")
print("=" * 55)
print(f"{'Model':8} {'RMSE':>8} {'MAPE':>8}")
print("-" * 55)
print(f"{'GB (1-step, Guide 02)':8.4f} {gb_rmse:>8.4f} {gb_mape:>7.2f}%")
print(f"{'LSTM (univariate, 24-step)':8.4f} {rmse:>8.4f} {mape:>7.2f}%")
print(f"{'LSTM + weather (24-step)':8.4f} {rmse_mv:>8.4f} {mape_mv:>7.2f}%")
print("=" * 55)

In [None]:
# Visual comparison: one week of forecasts
week_idx = slice(500, 668)  # 168 hours = 1 week

fig, ax = plt.subplots(figsize=(14, 6))

# Actual load
week_actual = gb_test.iloc[week_idx]
ax.plot(week_actual["timestamp"].values,
        week_actual["load_mw"].values,
        label="Actual", linewidth=2, color="#1a202c")

# GB predictions
ax.plot(week_actual["timestamp"].values,
        gb_pred[week_idx],
        label="Gradient Boosting (Guide 02)", linewidth=1.5,
        linestyle="--", color="#D69E2E")

# LSTM + weather: use the 1-hour-ahead prediction from each sequence
lstm_week_pred = y_pred_mv_mw[500:668, 0]  # first step of each forecast
ax.plot(week_actual["timestamp"].values[:len(lstm_week_pred)],
        lstm_week_pred,
        label="LSTM + Weather", linewidth=1.5,
        linestyle="-.", color="#5FCCDB")

ax.set_title("One Week Comparison: Actual vs. GB vs. LSTM+Weather")
ax.set_ylabel("Load (MW)")
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Save the trained model weights
torch.save(model.state_dict(), "load_forecaster.pt")

# Load them back into a new model instance
model = LoadForecaster()
model.load_state_dict(torch.load("load_forecaster.pt"))
model.eval()