In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from torch import nn
from copy import deepcopy as dc
from torch.utils.data import DataLoader
import torch
import tqdm

from helper_functions.model_builder import LSTM, VanillaDNN
from helper_functions.data_setup import TimeseriesDataset
from helper_functions.engine import train
from torchinfo import summary

import warnings
warnings.filterwarnings("ignore")

In [3]:
df = pd.read_csv("Datasets/Dataframes_one_ex_var/ModelBenchmark_dataframe.csv")
df.head(), df.shape

(                  Date  Price  FR Generation  FR Load
 0  2018-01-01 00:00:00   4.74        53625.0  56250.0
 1  2018-01-01 01:00:00   3.66        52398.0  54300.0
 2  2018-01-01 02:00:00   1.26        51825.0  53600.0
 3  2018-01-01 03:00:00 -20.10        50729.0  50000.0
 4  2018-01-01 04:00:00 -31.82        50719.0  47100.0,
 (43824, 4))

In [4]:
# Set Date as index
df.Date = pd.to_datetime(df.Date)
df.set_index("Date", inplace=True)

In [5]:
# add dummy variables for weekdays + 7 features
names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
for i, x in enumerate(names):
    df[x] = (df.index.get_level_values(0).weekday == i).astype(int)

# add lagged variables for price + exogenous variables + (92 + 72*N) features
def prepare_dataframe_laggs(df: pd.DataFrame, historical_price_laggs: list =  [1,2,3,7], historical_exo_laggs: list = [1,7]):
    df = dc(df)

    exo_var = df.columns[1:-7]

    laggs = []
    for i in historical_price_laggs:
        new_laggs = [x for x in list(range(i, 24*i)) if x not in list(range(i-1, 24*(i-1)))]
        laggs += new_laggs

    for i in laggs:
        df[f'Price(t-{i})'] = df["Price"].shift(i)

    laggs = []
    for i in historical_exo_laggs:
        new_laggs = [x for x in list(range(i, 24*i)) if x not in list(range(i-1, 24*(i-1)))]
        laggs += new_laggs

    for exo in exo_var:
        for j in laggs:
            df[f'{exo}(t-{j})'] = df[exo].shift(j)

    return df

lagged_df = prepare_dataframe_laggs(df, historical_price_laggs=[1,2,3,7], historical_exo_laggs=[1,7])
lagged_df.dropna(inplace=True)
lagged_df.head()

Unnamed: 0_level_0,Price,FR Generation,FR Load,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday,...,FR Load(t-158),FR Load(t-159),FR Load(t-160),FR Load(t-161),FR Load(t-162),FR Load(t-163),FR Load(t-164),FR Load(t-165),FR Load(t-166),FR Load(t-167)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-07 23:00:00,18.91,72256.0,63350.0,0,0,0,0,0,0,1,...,46950.0,46400.0,46000.0,45850.0,45850.0,47100.0,50000.0,53600.0,54300.0,56250.0
2018-01-08 00:00:00,15.15,71126.0,59650.0,1,0,0,0,0,0,0,...,49100.0,46950.0,46400.0,46000.0,45850.0,45850.0,47100.0,50000.0,53600.0,54300.0
2018-01-08 01:00:00,15.11,70217.0,57050.0,1,0,0,0,0,0,0,...,51500.0,49100.0,46950.0,46400.0,46000.0,45850.0,45850.0,47100.0,50000.0,53600.0
2018-01-08 02:00:00,13.42,70273.0,56200.0,1,0,0,0,0,0,0,...,54000.0,51500.0,49100.0,46950.0,46400.0,46000.0,45850.0,45850.0,47100.0,50000.0
2018-01-08 03:00:00,12.91,70608.0,53300.0,1,0,0,0,0,0,0,...,55100.0,54000.0,51500.0,49100.0,46950.0,46400.0,46000.0,45850.0,45850.0,47100.0


In [6]:
lagged_df_price_np = lagged_df.iloc[:,:105]
lagged_df_price_np.drop(df.columns[1:10],axis = 1, inplace=True)
lagged_df_price_np.shape

(43657, 96)

In [7]:
from sklearn.preprocessing import MinMaxScaler
# Transform features by scaling each feature to a given range. 
scaler = MinMaxScaler(feature_range=(-1,1))
lagged_df_price_np = scaler.fit_transform(lagged_df_price_np)
lagged_df_price_np

array([[-0.72783405, -0.71140919, -0.70597479, ..., -0.76362401,
        -0.75875739, -0.75656741],
       [-0.73545843, -0.72783405, -0.71140919, ..., -0.80693697,
        -0.76362401, -0.75875739],
       [-0.73553954, -0.73545843, -0.72783405, ..., -0.83070231,
        -0.80693697, -0.76362401],
       ...,
       [-0.76133264, -0.75559408, -0.74478612, ..., -0.53884681,
        -0.52228001, -0.54154373],
       [-0.76591538, -0.76133264, -0.75559408, ..., -0.54991838,
        -0.53884681, -0.52228001],
       [-0.77508086, -0.76591538, -0.76133264, ..., -0.58197727,
        -0.54991838, -0.53884681]])

In [8]:
X = lagged_df_price_np[:, 1:]
y = lagged_df_price_np[:, 0]

# Test set = 2021/01/01 tot 2022/12/12
split_index = df.index.get_loc("2021-01-01 00:00:00")

X_train = X[:split_index]
X_test = X[split_index:]

y_train = y[:split_index]
y_test = y[split_index:]

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((26304, 95), (17353, 95), (26304,), (17353,))

In [14]:
X_train = X_train.reshape((-1, 95, 1))
X_test = X_test.reshape((-1, 95, 1))

y_train = y_train.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((26304, 95, 1), (17353, 95, 1), (26304, 1), (17353, 1))

In [15]:
X_train = torch.tensor(X_train).float()
X_test = torch.tensor(X_test).float()
y_train = torch.tensor(y_train).float()
y_test = torch.tensor(y_test).float()

X_train.shape, X_test.shape, y_train.shape, y_test.shape

(torch.Size([26304, 95, 1]),
 torch.Size([17353, 95, 1]),
 torch.Size([26304, 1]),
 torch.Size([17353, 1]))

In [16]:
train_dataset = TimeseriesDataset(X_train, y_train, seq_len=2688)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = 64, shuffle = False)

In [17]:
test_dataset = TimeseriesDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [19]:
model = VanillaDNN(1,239, 1)
model.to(device)
model

VanillaDNN(
  (layer_stack): Sequential(
    (0): Linear(in_features=1, out_features=239, bias=True)
    (1): ReLU()
    (2): Linear(in_features=239, out_features=239, bias=True)
    (3): ReLU()
    (4): Linear(in_features=239, out_features=239, bias=True)
    (5): ReLU()
    (6): Linear(in_features=239, out_features=1, bias=True)
  )
)

In [20]:
learning_rate = 0.01
num_epochs = 10
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Start training with help from engine.py
train(model=model,
             train_dataloader=train_loader,
             test_dataloader=test_loader,
             loss_fn=loss_function,
             optimizer=optimizer,
             epochs=10,
             device=device)

  0%|          | 0/10 [00:00<?, ?it/s]