# 72-Hour Renewable Energy Forecast (CycleLSTM) using ENTSO-E Data

**Project Overview:**
This notebook implements a 72-hour renewable energy forecast using a **CycleLSTM** model in **PyTorch**. It is a refactored version of the original CNN-LSTM project, incorporating the CycleNet architecture to better handle seasonal patterns in the time series data.

**Key Components:**
1.  **Data Loading:** Uses a PyTorch `Dataset` and `DataLoader` for efficient data handling.
2.  **Model:** Implements the `CycleLSTMModel`, which includes a `RecurrentCycle` component to learn and subtract cyclical patterns before processing by the LSTM layers.
3.  **Training:** A custom PyTorch training loop with a learning rate scheduler (`ReduceLROnPlateau`) and early stopping is used.

In [1]:
# Cell 1: Necessary Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

import os

ModuleNotFoundError: No module named 'torch'

In [None]:
# Cell 2: Configuration

config = {
    'data_params': {
        'country_code': 'DE',
        'years_history': 5,
        'target_variable': 'renewable_percentage'
    },
    'model_params': {
        'input_length': 72,       # Use past 72 hours (3 days) of data
        'output_length': 72,      # Predict next 72 hours
        'lstm_input_size': 1,     # Number of features
        'hidden_size': 64,
        'num_layers': 2,
        'dropout': 0.2,
        'cycle_len': 24,            # Daily seasonality for hourly data
        'cycle_channel_size': 1
    },
    'training_params': {
        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
        'batch_size': 32,
        'num_epochs': 60,
        'initial_learning_rate': 0.001,
        'patience_lr': 2,
        'lr_reduction': 0.1,
        'min_learning_rate': 1e-05,
        'early_stopping_patience': 3
    }
}

DATA_FILENAME = f"energy_data_{config['data_params']['country_code']}_{config['data_params']['years_history']}years.csv"
print(f"Using device: {config['training_params']['device']}")

### Data Loading and Preprocessing

In [None]:
# Cell 3: Data Handling and Splitting Functions (from dataloader.py)

def train_val_test_split(data, train_ratio=0.7, val_ratio=0.15):
    """Splits the dataset into train, validation, and test sets."""
    total_len = len(data)
    train_end = int(total_len * train_ratio)
    val_end = train_end + int(total_len * val_ratio)
    train_data = data[:train_end]
    val_data = data[train_end:val_end]
    test_data = data[val_end:]
    return train_data, val_data, test_data

def fit_scaler(train_data):
    """Fits a MinMaxScaler using the training data."""
    scaler = MinMaxScaler()
    scaler.fit(train_data)
    return scaler

In [None]:
# Cell 4: PyTorch Dataset and DataLoader (from dataloader.py)

class TimeSeriesDataset(Dataset):
    def __init__(self, data, input_length, output_length, scaler, cycle_len):
        self.data = scaler.transform(data)
        self.input_length = input_length
        self.output_length = output_length
        self.cycle_len = cycle_len
        self.cycle_index = (np.arange(len(self.data)) % self.cycle_len).reshape(-1, 1)

    def __len__(self):
        return len(self.data) - self.input_length - self.output_length + 1

    def __getitem__(self, idx):
        x = self.data[idx : idx + self.input_length]
        y = self.data[idx + self.input_length : idx + self.input_length + self.output_length]
        cycle_idx = self.cycle_index[idx]
        return np.array(x, dtype=np.float32), np.array(y, dtype=np.float32), np.array(cycle_idx, dtype=np.int32)


def create_data_loader(data, input_length, output_length, batch_size, scaler, cycle_len):
    dataset = TimeSeriesDataset(data, input_length, output_length, scaler, cycle_len)
    return DataLoader(dataset, batch_size=batch_size, shuffle=False)

In [None]:
# Cell 5: Load and Prepare Data

print(f"Loading data from file: {DATA_FILENAME}")
try:
    cached_data = pd.read_csv(DATA_FILENAME, index_col=0, parse_dates=True)
    renewable_series = cached_data[[config['data_params']['target_variable']]]
    print("Data loaded successfully.")
except FileNotFoundError:
    print(f"CRITICAL: Data file not found at '{os.path.abspath(DATA_FILENAME)}'.")
    print("Please ensure the file exists.")
    renewable_series = None

if renewable_series is not None:
    # Split data
    train_data, val_data, test_data = train_val_test_split(renewable_series.values)
    print(f"Train data length: {len(train_data)}")
    print(f"Validation data length: {len(val_data)}")
    print(f"Test data length: {len(test_data)}\n")

    # Fit scaler only on training data
    scaler = fit_scaler(train_data)

    # Create DataLoaders
    train_loader = create_data_loader(train_data, config['model_params']['input_length'], config['model_params']['output_length'], config['training_params']['batch_size'], scaler, config['model_params']['cycle_len'])
    val_loader = create_data_loader(val_data, config['model_params']['input_length'], config['model_params']['output_length'], config['training_params']['batch_size'], scaler, config['model_params']['cycle_len'])
    test_loader = create_data_loader(test_data, config['model_params']['input_length'], config['model_params']['output_length'], config['training_params']['batch_size'], scaler, config['model_params']['cycle_len'])

    print("DataLoaders created.")

### CycleLSTM Model Definition

In [None]:
# Cell 6: RecurrentCycle and CycleLSTM Model (from cyclenet_LSTM.py)

class RecurrentCycle(nn.Module):
    def __init__(self, cycle_len, channel_size):
        super(RecurrentCycle, self).__init__()
        self.cycle_len = cycle_len
        self.channel_size = channel_size
        self.data = nn.Parameter(torch.zeros(cycle_len, channel_size), requires_grad=True)

    def forward(self, index, length):
        gather_index = (index.view(-1, 1) + torch.arange(length, device=index.device).view(1, -1)) % self.cycle_len
        return self.data[gather_index]

class CycleLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, cycle_len, cycle_channel_size, seq_len, dropout=0.2):
        super(CycleLSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.seq_len = seq_len
        self.output_size = output_size
        self.cycleQueue = RecurrentCycle(cycle_len=cycle_len, channel_size=cycle_channel_size)
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, index, cycle_len):
        cq = self.cycleQueue(index, self.seq_len)
        x = x - cq
        
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])

        cp = self.cycleQueue((index + self.seq_len) % cycle_len, self.output_size)
        out = out + cp.squeeze(2)
        return out

### Training and Evaluation Functions

In [None]:
# Cell 7: Training Loop (from cyclenet_LSTM.py)

def train_model_loop(
    model, train_loader, val_loader, num_epochs, learning_rate, device,
    cycle_len, output_size, patience, factor, min_lr, early_stopping_patience
):
    model.to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=patience, factor=factor, min_lr=min_lr)
    
    history = {"train_loss": [], "val_loss": []}
    best_val_loss = float('inf')
    early_stopping_counter = 0
    best_model_state = None

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for x, y, x_idx in train_loader:
            x, y, x_idx = x.to(device), y.to(device), x_idx.to(device)
            outputs = model(x, x_idx, cycle_len)
            loss = criterion(outputs, y.squeeze(2))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        train_loss /= len(train_loader)
        history["train_loss"].append(train_loss)
        
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for x, y, x_idx in val_loader:
                x, y, x_idx = x.to(device), y.to(device), x_idx.to(device)
                outputs = model(x, x_idx, cycle_len)
                loss = criterion(outputs, y.squeeze(2))
                val_loss += loss.item()
        
        val_loss /= len(val_loader)
        history["val_loss"].append(val_loss)
        
        scheduler.step(val_loss)
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            early_stopping_counter = 0
            best_model_state = model.state_dict()
        else:
            early_stopping_counter += 1
        
        current_lr = optimizer.param_groups[0]['lr']
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.5f}, Val Loss: {val_loss:.5f}, LR: {current_lr:.6f}")
        
        if early_stopping_counter >= early_stopping_patience:
            print(f"Early stopping triggered at epoch {epoch+1}.")
            if best_model_state:
                model.load_state_dict(best_model_state)
            break
    
    return model, history

In [None]:
# Cell 8: Evaluation and Plotting Functions

def evaluate_model(model, data_loader, device, cycle_len, scaler):
    model.eval()
    all_predictions, all_targets = [], []
    
    with torch.no_grad():
        for x, y, idx in data_loader:
            x, y, idx = x.to(device), y.to(device), idx.to(device)
            outputs = model(x, idx, cycle_len)
            all_predictions.append(outputs.cpu().numpy())
            all_targets.append(y.squeeze(2).cpu().numpy())
    
    all_predictions = np.concatenate(all_predictions)
    all_targets = np.concatenate(all_targets)
    
    # Inverse transform to get original scale
    all_predictions_inv = scaler.inverse_transform(all_predictions)
    all_targets_inv = scaler.inverse_transform(all_targets)
    
    y_true_flat = all_targets_inv.flatten()
    y_pred_flat = all_predictions_inv.flatten()
    
    metrics = {
        "MAE": mean_absolute_error(y_true_flat, y_pred_flat),
        "MSE": mean_squared_error(y_true_flat, y_pred_flat),
        "RMSE": np.sqrt(mean_squared_error(y_true_flat, y_pred_flat))
    }
    return metrics, all_predictions_inv, all_targets_inv

def plot_training_history(history):
    plt.figure(figsize=(10, 6))
    plt.plot(history["train_loss"], label="Train Loss")
    plt.plot(history["val_loss"], label="Validation Loss")
    plt.title("Training and Validation Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss (MSE)")
    plt.legend()
    plt.grid(True)
    plt.show()

def plot_predictions(predictions, targets, num_samples=3):
    if num_samples > len(predictions):
        num_samples = len(predictions)
    
    plt.figure(figsize=(15, 5 * num_samples))
    for i in range(num_samples):
        sample_idx = np.random.randint(0, len(predictions))
        plt.subplot(num_samples, 1, i + 1)
        plt.plot(targets[sample_idx, :], label="Actual Future", marker='.')
        plt.plot(predictions[sample_idx, :], label="Predicted Future", marker='x', linestyle='--')
        plt.title(f"Forecast vs Actuals (Test Sample {sample_idx})")
        plt.xlabel("Time (Hours into future)")
        plt.ylabel("Renewable Percentage (%)")
        plt.legend()
        plt.grid(True)
    
    plt.tight_layout()
    plt.show()

### Model Training and Final Evaluation

In [None]:
# Cell 9: Run Training and Evaluation

if renewable_series is not None:
    # Initialize the model
    model = CycleLSTMModel(
        input_size=config['model_params']['lstm_input_size'],
        hidden_size=config['model_params']['hidden_size'],
        num_layers=config['model_params']['num_layers'],
        output_size=config['model_params']['output_length'],
        cycle_len=config['model_params']['cycle_len'],
        cycle_channel_size=config['model_params']['cycle_channel_size'],
        seq_len=config['model_params']['input_length'],
        dropout=config['model_params']['dropout']
    )

    print("--- Starting CycleLSTM Model Training ---\n")
    trained_model, history = train_model_loop(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        **config['training_params']
    )

    # Plot training history
    plot_training_history(history)

    # Final evaluation on the test set
    print("\n--- Final Model Evaluation on Test Set ---")
    metrics, predictions, targets = evaluate_model(
        trained_model, 
        test_loader, 
        config['training_params']['device'], 
        config['model_params']['cycle_len'], 
        scaler
    )

    print(f"\nOverall Test Set Metrics:")
    for metric_name, value in metrics.items():
        print(f"  {metric_name}: {value:.4f}")

    # Plot some predictions
    print("\n--- Sample Predictions vs Actuals ---")
    plot_predictions(predictions, targets)
else:
    print("Data not loaded. Cannot run training and evaluation.")