# Deep Learning for Time Series Forecasting : Introduction to RNN and LSTM

In this notebook, we will learn how to implement a recurrent neural network from scratch using Pytorch. We will also see how to use LSTM to solve a multivariate time series forecasting problem.
The task is to forecast the next day consumption of power of power given the passed day data.

## 1. RNN from scratch in an univariate time series forecasting problem with a recursive approach

#### Import and load data

In [None]:
# Import external modules
import os
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import MinMaxScaler

# Data loader
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import torch
import torch.nn as nn

pd.options.plotting.backend = "plotly"

In [None]:
processed_data_path = "data/"
processed_filename = os.path.join(processed_data_path, "clean-data-consolidated.csv")
data = pd.read_csv(processed_filename)
# Convert 'Date - Heure' to datetime and set as index
data["Date - Heure"] = pd.to_datetime(data["Date - Heure"])
data.set_index("Date - Heure", inplace=True)

# select only "Île-de-France"
data_iledefrance = data[data["Région"] == "Île-de-France"]
# Focusing on the 'Consommation brute électricité (MW) - RTE' column
ts_data = data_iledefrance["Consommation brute électricité (MW) - RTE"]

In [None]:
# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

### Preprocessing of the data

The first requirement is to create the sequences. In a first approach we will use a the last 24 hours to predict the next hour. We will then use the Recursive Multi-Step Forecasting strategy to predict the next 24 hours.

In [None]:
# Normalizing the data
scaler = MinMaxScaler(feature_range=(-1, 1))
ts_scaled = scaler.fit_transform(ts_data.values.reshape(-1, 1))


# Function to create sequences
def create_sequences_rec(data, seq_length):
    """Create sequences of seq_length from data. And return X and y with y being the next value after X."""
    xs = []
    ys = []

    for i in range(len(data) - seq_length):
        x = # TODO
        y = # TODO
        xs.append(x)
        ys.append(y)

    return np.array(xs), np.array(ys)


# Defining the sequence length (number of hours to use for prediction)
seq_length = 24  # using last 24 hours to predict the next hour
X, y = create_sequences_rec(ts_scaled, seq_length)

# Splitting the data into training and test sets
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Converting to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

X_train.shape, y_train.shape, X_test.shape, y_test.shape

test_data = TensorDataset(X_test, y_test)

# shuffle the training data without dataloader
indices = torch.randperm(len(X_train))
X_train = X_train[indices]
y_train = y_train[indices]
train_data = TensorDataset(X_train, y_train)

print(f"Number of training samples: {len(train_data)}")
print(f"Number of test samples: {len(test_data)}")

### Model definition

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.in2hidden = nn.Linear(input_size + hidden_size, hidden_size)
        self.in2output = nn.Linear(input_size + hidden_size, output_size)

    def forward(self, x, hidden_state):
        combined = # TODO
        hidden = # TODO
        output = # TODO
        return output, hidden

    def initHidden(self):
        return nn.init.kaiming_uniform_(
            torch.empty(1, self.hidden_size)
        )  # Add batch_size as the first dimension


# Instantiate the model
input_size = 1
hidden_size = 64
output_size = 1
model = RNN(input_size, hidden_size, output_size).to(device)

### Training

In [None]:
from tqdm import tqdm
import matplotlib.pyplot as plt

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)


# Training loop
num_epochs = 100


def evaluate(model, X_test, y_test, criterion):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0
    with torch.no_grad():  # No need to track gradients
        for batch in range(len(X_test)):
            hidden = model.initHidden().to(device)
            X_batch = X_test[batch].to(device)
            y_batch = y_test[batch].to(device)
            for t in range(seq_length):
                output, hidden = #TODO
            output = output.view(-1)
            loss = criterion(output, y_batch)
            total_loss += loss.item()
    return total_loss / len(X_test)


# Lists to store loss values
training_losses = []
testing_losses = []

# Training loop with evaluation
for epoch in tqdm(range(num_epochs), desc="Epochs"):
    model.train()
    train_loss = 0
    for batch in range(len(X_train)):
        hidden = model.initHidden().to(device)
        X_batch = X_train[batch].to(device)
        y_batch = y_train[batch].to(device)
        model.zero_grad()
        for t in range(seq_length):
            output, hidden = # TODO
        output = output.view(-1)
        
        loss = criterion(output, y_batch)
        train_loss += loss.item()
        
        loss.backward()
        optimizer.step()
    avg_train_loss = train_loss / len(X_train)
    training_losses.append(avg_train_loss)

    # Evaluate on test data
    avg_test_loss = evaluate(model, X_test, y_test, criterion)
    testing_losses.append(avg_test_loss)

    # Print average losses for the epoch
    print(
        f"Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}, Test Loss: {avg_test_loss:.4f}"
    )

# Plotting the training and testing loss curves
plt.figure(figsize=(10, 5))
plt.plot(training_losses, label="Training Loss")
plt.plot(testing_losses, label="Testing Loss")
plt.title("Training and Testing Loss Curves")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()

In case of lack of GPUs, you can load the trained model :

In [None]:
# save model to disk
# torch.save(model.state_dict(), "rnn_model.pt")
# model = RNN(input_size, hidden_size, output_size).to(device)
# model.load_state_dict(torch.load("rnn_model.pt"))

In [None]:
def predict_recursive(model, initial_sequence, num_predictions):
    model.eval()  # Set the model to evaluation mode
    predictions = []

    # Use the last seq_length values from the training data as initial input
    input_sequence = initial_sequence

    # Initialize the hidden state
    hidden = model.initHidden().to("cpu")
    model = model.to("cpu")
    input_sequence = input_sequence.to("cpu")
    
    # Iterate over the length of the sequence we want to predict
    for p in range(num_predictions):
        for t in range(seq_length):
            output, hidden = # TODO
        input_sequence = torch.cat((input_sequence, output.view(1, 1)))
        # remvoe the first element
        input_sequence = input_sequence[1:]
        predictions += [output.item()]
    return predictions


predictions = predict_recursive(model, X_test[0], 24)

In [None]:
# Extract the real values for the last day from y_test
# Assuming y_test is already in the correct scale
real_values_last_day = y_test[-24:].cpu().numpy()
# unscale real_values_last_day
real_values_last_day = scaler.inverse_transform(real_values_last_day.reshape(-1, 1))
predictions_rescaled = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))
# Plotting
plt.figure(figsize=(12, 6))
plt.plot(real_values_last_day, label="Real Values", color="blue", marker="o")
plt.plot(predictions_rescaled, label="Predictions", color="red", marker="x")
plt.title("Electricity Consumption: Real vs Predicted")
plt.xlabel("Hour")
plt.ylabel("Electricity Consumption (MW)")
plt.legend()
plt.show()

## 2. LSTM in a univariate time series forecasting problem multi-output

Now it is you turn : Implement a LSTM directly with the LSTM module in pytorch in a multi-output strategy the next 24 hours in an uni-variate time series forecasting problem.

In [None]:
def create_sequences(ts_scaled, input_len, prediction_len):
    """Create sequences of input_len from data. And return X and y with y being the next value after X."""
    X = []
    y = []

    for i in range(len(ts_scaled) - input_len - prediction_len):
        X.append(#TODO)
        y.append(#TODO)

    return np.array(X), np.array(y)


# Defining the sequence length (number of hours to use for prediction)
seq_length = 24  # using last 24 hours to predict the next hour
prediction_len = 24
X, y = create_sequences(ts_scaled, seq_length, prediction_len)

# Splitting the data into training and test sets
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Converting to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

X_train.shape, y_train.shape, X_test.shape, y_test.shape

test_data = TensorDataset(X_test, y_test)
train_data = TensorDataset(X_train, y_train)

batch_size = 32
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class LSTM(nn.Module):
    #TODO


# TODO : Instantiate the model

In [None]:
# TODO : Loss and optimizer
# TODO : Training loop
# TODO : Evaluate on test data

## 3. LSTM in a multivariate time series forecasting problem

Now it is you turn : Implement a LSTM directly with the LSTM module in pytorch in a multi-output strategy the next 24 hours in an multi-variate time series forecasting problem.
First try to incorporate the different features you have created in the previous notebook only for Ile-de-France. 
Then prepare the data for the whole regions.