In [1]:
import numpy as np
import pandas as pd
import holidays

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from load_data import load_data, split_data

data = load_data("1h")
all_columns = data.keys()
all_columns

Index(['MWh', 'solar_fore_de [MW]', 'solar_fore_it [MW]', 'wind_fore_de [MW]',
       'wind_fore_it [MW]', 'CH_AT', 'CH_DE', 'CH_FR', 'CH_IT', 'AT_CH',
       'DE_CH', 'FR_CH', 'IT_CH'],
      dtype='object')

In [2]:
columns_to_drop = [
    "CH_AT",
    "CH_DE",
    "CH_FR",
    "CH_IT",
    "AT_CH",
    "DE_CH",
    "FR_CH",
    "IT_CH",
]

In [3]:
data_filtered = data.drop(columns=columns_to_drop)
data_filtered

Unnamed: 0,MWh,solar_fore_de [MW],solar_fore_it [MW],wind_fore_de [MW],wind_fore_it [MW]
2019-01-01 00:00:00+00:00,129.716036,0.0,0.0,23052.3310,4596.5916
2019-01-01 01:00:00+00:00,133.398074,0.0,0.0,24969.9701,4478.5564
2019-01-01 02:00:00+00:00,135.133852,0.0,0.0,27082.9626,4323.3712
2019-01-01 03:00:00+00:00,131.699424,0.0,0.0,26890.9717,4231.8283
2019-01-01 04:00:00+00:00,147.391128,0.0,0.0,27740.1555,4266.3082
...,...,...,...,...,...
2021-12-31 18:00:00+00:00,171.707318,0.0,0.0,36997.7200,1108.4000
2021-12-31 19:00:00+00:00,159.462903,0.0,0.0,35666.9300,1077.9700
2021-12-31 20:00:00+00:00,155.109520,0.0,0.0,34383.8800,1048.2800
2021-12-31 21:00:00+00:00,171.370277,0.0,0.0,33075.2500,1078.7800


In [4]:
# Function to determine the season
def determine_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Autumn'

# Function to determine if the date is a public holiday in Switzerland
def is_public_holiday(date):
    swiss_holidays = holidays.Switzerland(years=date.year)
    return 1 if date in swiss_holidays else 0

# Function to determine if the date is a special day in Switzerland (you can define your own special days)
def is_special_day(date):
    special_days = [pd.Timestamp(f'{date.year}-08-01'), pd.Timestamp(f'{date.year}-12-25'), pd.Timestamp(f'{date.year}-01-01')]
    return 1 if date in special_days else 0

# Add feature columns
def add_feature_columns(df):

    # Public Holidays and Special days
    df['Public_Holiday'] = df.index.to_series().map(is_public_holiday)
    df['Special_Day'] = df.index.to_series().map(is_special_day)

    return df

In [5]:
# Assuming merged_df is your DataFrame with datetime index
data_filtered = add_feature_columns(data_filtered)
data_filtered

Unnamed: 0,MWh,solar_fore_de [MW],solar_fore_it [MW],wind_fore_de [MW],wind_fore_it [MW],Public_Holiday,Special_Day
2019-01-01 00:00:00+00:00,129.716036,0.0,0.0,23052.3310,4596.5916,1,0
2019-01-01 01:00:00+00:00,133.398074,0.0,0.0,24969.9701,4478.5564,1,0
2019-01-01 02:00:00+00:00,135.133852,0.0,0.0,27082.9626,4323.3712,1,0
2019-01-01 03:00:00+00:00,131.699424,0.0,0.0,26890.9717,4231.8283,1,0
2019-01-01 04:00:00+00:00,147.391128,0.0,0.0,27740.1555,4266.3082,1,0
...,...,...,...,...,...,...,...
2021-12-31 18:00:00+00:00,171.707318,0.0,0.0,36997.7200,1108.4000,0,0
2021-12-31 19:00:00+00:00,159.462903,0.0,0.0,35666.9300,1077.9700,0,0
2021-12-31 20:00:00+00:00,155.109520,0.0,0.0,34383.8800,1048.2800,0,0
2021-12-31 21:00:00+00:00,171.370277,0.0,0.0,33075.2500,1078.7800,0,0


In [6]:
train_df = data_filtered[: 2 * 8760]
test_df = data_filtered[2 * 8760 :]

X_train, metadata_train, y_train = split_data(train_df, 6 * 24, 24, 1, "MWh")
X_test, metadata_test, y_test = split_data(test_df, 6 * 24, 24, 1, "MWh")

X_train.shape, metadata_train.shape, y_train.shape, X_test.shape

((17353, 144), (17353, 7, 24), (17353, 24), (8616, 144))

In [9]:
X_train[0]

array([129.716036  , 133.39807446, 135.13385173, 131.69942406,
       147.39112815, 167.62953552, 189.13669274, 197.98881083,
       193.20514532, 188.10869616, 177.24680874, 147.49166408,
       146.29581829, 173.66894928, 204.10060079, 197.34109108,
       174.24725261, 167.02796827, 160.99376635, 189.93669224,
       188.36530115, 197.98602986, 180.19370578, 163.367636  ,
       160.25457549, 157.62713061, 162.55583797, 168.5107323 ,
       193.24578895, 219.948629  , 204.4927437 , 201.29854519,
       175.79006814, 165.71714717, 162.91946957, 124.85653726,
       145.77554275, 168.4304249 , 172.78109922, 159.01470195,
       137.56694132, 155.30014968, 175.18890453, 194.07170298,
       178.69874381, 186.47962334, 179.72249308, 167.05302653,
       161.49163523, 164.5678679 , 144.99496507, 133.22600848,
       161.83418257, 171.51103709, 137.42954213, 163.07656589,
       135.7202814 , 126.17952166, 132.81393511, 106.11378611,
       105.42675655, 105.16959734, 101.94593473, 102.24

In [10]:
metadata_train[0][0]

array([189.41318582, 198.63061968, 171.68787113, 177.3757639 ,
       180.65097994, 126.54668446, 106.49979841, 111.98586163,
       114.50757033,  97.86564475,  96.35556294,  82.14225524,
        90.22833264, 100.03679758,  99.51644132,  96.52042397,
       104.12986492, 104.28193521, 113.16937373, 121.38583381,
       148.7318302 , 157.8023946 , 179.52638759, 142.91166115])

In [7]:
X_train = torch.Tensor(X_train)[:, :].to(device='cuda')
X_test = torch.Tensor(X_test)[:, :].to(device='cuda')
y_train = torch.Tensor(y_train)[:, :].to(device='cuda')
y_test = torch.Tensor(y_test)[:, :].to(device='cuda')

In [8]:
mean = X_train.mean()
std = X_train.std()

X_train -= mean
X_train /= std
X_test -= mean
X_test /= std

In [9]:
class AirModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super().__init__()
        self.gru = nn.GRU(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
        )
        self.linear = nn.Linear(hidden_size, 24)

    def forward(self, x):
        out = x
        x, _ = self.gru(x)
        x = self.linear(x)
        return x
    
        # def forward(self, x):
    #     out = x
    #     for _ in range(4):
    #         x = out
    #         x, _ = self.gru(x)
    #         x = self.linear(x)
    #         out = torch.roll(out, -6, dims=-1)
    #         out[..., -6:] = x[:, :]
    #     return out[:, -24:]

In [None]:
## import torch.optim as optim
from torch import utils

model = AirModel(144, 10, 2).to(device='cuda')
optimizer = optim.Adam(model.parameters())
loss_fn = nn.L1Loss()
loader = utils.data.DataLoader(
    utils.data.TensorDataset(X_train, y_train),
    shuffle=True,
    batch_size=1024,
)

n_epochs = 2000
for epoch in range(n_epochs):
    model.train()
    for X_batch, y_batch in loader:
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # Validation
    if epoch % 100 != 0:
        continue
    model.eval()
    with torch.no_grad():
        y_pred = model(X_train)
        train_rmse = loss_fn(y_pred, y_train)
        y_pred = model(X_test)
        test_rmse = loss_fn(y_pred, y_test)
    print("Epoch %d: train MAE %.4f, test MAE %.4f" % (epoch, train_rmse, test_rmse))

Epoch 0: train MAE 106.7055, test MAE 98.1115
Epoch 100: train MAE 87.3998, test MAE 78.4579
Epoch 200: train MAE 68.2308, test MAE 59.6708
Epoch 300: train MAE 50.9565, test MAE 42.2600
Epoch 400: train MAE 38.7398, test MAE 30.0148
Epoch 500: train MAE 32.2776, test MAE 24.4620
Epoch 600: train MAE 25.6338, test MAE 21.3915
Epoch 700: train MAE 21.9100, test MAE 20.2173
Epoch 800: train MAE 20.2801, test MAE 20.2238
Epoch 900: train MAE 19.4417, test MAE 19.4486
Epoch 1000: train MAE 19.0344, test MAE 19.3346
Epoch 1100: train MAE 18.8268, test MAE 19.1946
Epoch 1200: train MAE 18.7263, test MAE 19.1406
Epoch 1300: train MAE 18.6761, test MAE 19.2112
Epoch 1400: train MAE 18.6651, test MAE 19.1617
Epoch 1500: train MAE 18.6535, test MAE 19.2310
Epoch 1600: train MAE 18.6144, test MAE 19.2485
Epoch 1700: train MAE 18.5777, test MAE 19.2021
