# First RNN

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Tuple, Sequence

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
train_data = pd.read_csv("/Users/carlbechtolsheim/code/Niki827/watt_squad/raw_data/train.csv")
test_data = pd.read_csv("/Users/carlbechtolsheim/code/Niki827/watt_squad/raw_data/test.csv")

## Data preprocessing

In [44]:
import pandas as pd
import numpy as np

from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer

from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import FunctionTransformer

## lists of features:


# f_time = ['time']


## our custom functions:

def log_transformed(data):
    """ replaces values in columns in a dataframe with the log values """
    f_logs = [
    'precip_1h:mm',
    'prob_precip_1h:p',
    'clear_sky_rad:W',
    'clear_sky_energy_1h:J',
    'diffuse_rad:W',
    'diffuse_rad_1h:Wh',
    'direct_rad:W',
    'direct_rad_1h:Wh',
    'global_rad:W',
    'global_rad_1h:Wh',
    'wind_speed_2m:ms',
    'wind_speed_10m:ms',
    'wind_speed_50m:ms',
    'wind_speed_100m:ms'
]
    for col in f_logs:
        data[col] = np.log(data[col] + 1e-5)
    return data

def time_transformed(data):
    """takes a df and splits the 'time' feature into three features: hour, month, season;
    drops the original time column"""

    feature = pd.to_datetime(data.time)

    hour = feature.dt.hour
    month  = feature.dt.month

    def assign_season(month):
        if month in [3, 4, 5]:
            return 1  # Spring
        elif month in [6, 7, 8]:
            return 2  # Summer
        elif month in [9, 10, 11]:
            return 3  # Fall
        else:  # December, January, February
            return 4  # Winter

    season = month.apply(assign_season)
    hour_sine = np.sin(2 * np.pi * hour / 24)
    hour_cosine = np.cos(2 * np.pi * hour / 24)
    month_sine = np.sin(2 * np.pi * month / 12)
    month_cosine = np.cos(2 * np.pi * month / 12)
    season_sine = np.sin(2 * np.pi * season / 4)
    season_cosine = np.cos(2 * np.pi * season / 4)

    data["hour_sine"] = hour_sine
    data["hour_cosine"] = hour_cosine
    data["month_sine"] = month_sine
    data["month_cosine"] = month_cosine
    data["season_sine"] = season_sine
    data["season_cosine"] = season_cosine

    data = data.drop(columns=["time"])

    return data

def degree_transformed(data):
    """ takes a df 'data' and takes the features with degree units (in the specific list f_degree);
    creates a sin and cos column for each to make them cyclical. drops the original columns"""

    f_degree = ['sun_azimuth:d', 'wind_dir_2m:d', 'wind_dir_10m:d', 'wind_dir_50m:d', 'wind_dir_100m:d']

    for col in f_degree:
        sin_column = np.sin(2 * np.pi * data[col]/360)
        cos_column = np.cos(2 * np.pi * data[col]/360)

        data[f"sin_{col}"] = sin_column
        data[f"cos_{col}"] = cos_column
        data = data.drop(columns=[col])

    return data

def transform_data(data):
    """ applies the above three functions to the input dataframe """
    data = degree_transformed(time_transformed(log_transformed(data)))

    all_col = list(data.columns)

    # defining the columns we don't want in our X_train
    drop_col = ['pv_production',
            'wind_production',
            'consumption',
            'spot_market_price',
            'precip_type:idx']

    f_ohe = ['precip_type:idx']

    scale_col = [col for col in all_col if col not in drop_col and f_ohe]

    # defining our scalers
    minmax = MinMaxScaler()
    ohe = OneHotEncoder(handle_unknown='ignore', sparse_output = False)


    # our preproc pipline
    preproc = make_column_transformer(
        (ohe, f_ohe),
        (minmax, scale_col),
        remainder = "drop"
    )

    data_transformed = preproc.fit_transform(data)
    data_transformed = pd.DataFrame(data_transformed, columns=preproc.get_feature_names_out())
    data_transformed['onehotencoder__precip_type:idx_2.0'] = 0

    print('➡️ preprocessing done')
    return data_transformed



# ## building the pipeline

# data = pd.read_csv("raw_data/train.csv")

# # calling our custom functions on our dataframe
# data_ft = degree_transformed(time_transformed(log_transformed(data)))

# all_col = list(data_ft.columns)

# # defining the columns we don't want in our X_train
# drop_col = ['pv_production',
#             'wind_production',
#             'consumption',
#             'spot_market_price',
#             'precip_type:idx']

# # defining the columns we want to scale
# scale_col = [col for col in all_col if col not in drop_col and f_ohe]

# # defining our scalers
# minmax = MinMaxScaler()
# ohe = OneHotEncoder(handle_unknown='ignore', sparse_output = False)

# # our preproc pipline
# preproc = make_column_transformer(
#     (ohe, f_ohe),
#     (minmax, scale_col),
#     remainder = "drop"
# )

# data_transformed = preproc.fit_transform(data_ft)
# data_transformed = pd.DataFrame(data_transformed, columns=preproc.get_feature_names_out())


In [45]:
# creating y_train and y_test
y_train = train_data['wind_production'].copy()
y_test = test_data['wind_production'].copy()

# creating X_train and X_test
X_train = train_data
X_train = X_train.drop(columns=['pv_production', 'wind_production', 'consumption', 'spot_market_price'])
X_test = test_data
X_test = X_test.drop(columns=['pv_production', 'wind_production', 'consumption', 'spot_market_price'])

# Preprocessing features
X_train_transformed = transform_data(X_train)
X_test_transformed = transform_data(X_test)

➡️ preprocessing done
➡️ preprocessing done


In [46]:
TARGET = 'wind_production'

In [47]:
y_train.describe()

count    9515.000000
mean       21.254382
std        38.225468
min      -582.200000
25%        -0.260000
50%         2.370000
75%        26.040000
max       225.500000
Name: wind_production, dtype: float64

In [48]:
y_train[y_train<-3] = 0

In [49]:
y_train[y_train<-3]

Series([], Name: wind_production, dtype: float64)

In [50]:
df = X_train_transformed.copy()

In [51]:
df['wind_production'] = y_train

In [52]:
df

Unnamed: 0,onehotencoder__precip_type:idx_0.0,onehotencoder__precip_type:idx_1.0,onehotencoder__precip_type:idx_2.0,onehotencoder__precip_type:idx_3.0,minmaxscaler__precip_1h:mm,minmaxscaler__prob_precip_1h:p,minmaxscaler__clear_sky_rad:W,minmaxscaler__clear_sky_energy_1h:J,minmaxscaler__diffuse_rad:W,minmaxscaler__diffuse_rad_1h:Wh,...,minmaxscaler__cos_sun_azimuth:d,minmaxscaler__sin_wind_dir_2m:d,minmaxscaler__cos_wind_dir_2m:d,minmaxscaler__sin_wind_dir_10m:d,minmaxscaler__cos_wind_dir_10m:d,minmaxscaler__sin_wind_dir_50m:d,minmaxscaler__cos_wind_dir_50m:d,minmaxscaler__sin_wind_dir_100m:d,minmaxscaler__cos_wind_dir_100m:d,wind_production
0,1.0,0.0,0,0.0,0.0,0.0,0.757424,0.854846,0.781999,0.815432,...,0.038568,0.045382,0.291860,0.045382,0.291860,0.042169,0.299026,0.038731,0.307049,40.59
1,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.779972,0.000000,0.742604,...,0.095955,0.023405,0.348815,0.023405,0.348815,0.023669,0.347984,0.024203,0.346324,67.86
2,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.173244,0.018654,0.364700,0.018654,0.364700,0.019129,0.363021,0.019853,0.360507,116.68
3,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.265584,0.017037,0.370590,0.017037,0.370590,0.017952,0.367222,0.018891,0.363862,120.22
4,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.369241,0.031970,0.324079,0.031970,0.324079,0.031664,0.324897,0.031056,0.326534,109.86
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9510,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.600413,0.142764,0.150168,0.142764,0.150168,0.110331,0.186698,0.074594,0.237266,21.98
9511,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.720968,0.183310,0.113080,0.183310,0.113080,0.147064,0.145830,0.107072,0.190797,9.60
9512,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.836329,0.187379,0.109785,0.187379,0.109785,0.149545,0.143375,0.107612,0.190112,22.61
9513,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.933029,0.204697,0.096520,0.204697,0.096520,0.160279,0.133135,0.110878,0.186020,21.70


In [53]:
X_train_transformed

Unnamed: 0,onehotencoder__precip_type:idx_0.0,onehotencoder__precip_type:idx_1.0,onehotencoder__precip_type:idx_2.0,onehotencoder__precip_type:idx_3.0,minmaxscaler__precip_1h:mm,minmaxscaler__prob_precip_1h:p,minmaxscaler__clear_sky_rad:W,minmaxscaler__clear_sky_energy_1h:J,minmaxscaler__diffuse_rad:W,minmaxscaler__diffuse_rad_1h:Wh,...,minmaxscaler__sin_sun_azimuth:d,minmaxscaler__cos_sun_azimuth:d,minmaxscaler__sin_wind_dir_2m:d,minmaxscaler__cos_wind_dir_2m:d,minmaxscaler__sin_wind_dir_10m:d,minmaxscaler__cos_wind_dir_10m:d,minmaxscaler__sin_wind_dir_50m:d,minmaxscaler__cos_wind_dir_50m:d,minmaxscaler__sin_wind_dir_100m:d,minmaxscaler__cos_wind_dir_100m:d
0,1.0,0.0,0,0.0,0.0,0.0,0.757424,0.854846,0.781999,0.815432,...,0.307401,0.038568,0.045382,0.291860,0.045382,0.291860,0.042169,0.299026,0.038731,0.307049
1,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.779972,0.000000,0.742604,...,0.205451,0.095955,0.023405,0.348815,0.023405,0.348815,0.023669,0.347984,0.024203,0.346324
2,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.121531,0.173244,0.018654,0.364700,0.018654,0.364700,0.019129,0.363021,0.019853,0.360507
3,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.058350,0.265584,0.017037,0.370590,0.017037,0.370590,0.017952,0.367222,0.018891,0.363862
4,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.017398,0.369241,0.031970,0.324079,0.031970,0.324079,0.031664,0.324897,0.031056,0.326534
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9510,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.010188,0.600413,0.142764,0.150168,0.142764,0.150168,0.110331,0.186698,0.074594,0.237266
9511,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.051479,0.720968,0.183310,0.113080,0.183310,0.113080,0.147064,0.145830,0.107072,0.190797
9512,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.130026,0.836329,0.187379,0.109785,0.187379,0.109785,0.149545,0.143375,0.107612,0.190112
9513,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.250030,0.933029,0.204697,0.096520,0.204697,0.096520,0.160279,0.133135,0.110878,0.186020


### Wind feature importance

In [54]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor

In [55]:
XG_X_train_transformed, XG_X_val, XG_y_train, XG_y_val = train_test_split(
X_train_transformed, y_train, test_size = 0.1, random_state = 42  # val = 10%
)

In [56]:
xgb_reg = XGBRegressor(
    max_depth=7,                # Optimal value found
    n_estimators=300,           # Optimal value found
    learning_rate=0.05,         # Optimal value found
    reg_alpha=0.05,             # Optimal value found
    reg_lambda=20,              # Optimal value found
    subsample=0.8,              # Optimal value found
    colsample_bytree=0.8,       # Optimal value found
    objective='reg:squarederror',
    eval_metric="mae",
    random_state=42             # Ensuring reproducibility
)

# Fit the model on the training data
xgb_reg.fit(
    XG_X_train_transformed,
    XG_y_train,
    eval_set=[(XG_X_train_transformed, XG_y_train), (XG_X_val, XG_y_val)],
    verbose=True,
    early_stopping_rounds=5     # Retain early stopping
)

print("➡️  model fitting done")

# Make predictions
y_pred = xgb_reg.predict(X_test_transformed)

print("➡️  performed predictions")

[0]	validation_0-mae:20.80751	validation_1-mae:21.51030
[1]	validation_0-mae:20.20661	validation_1-mae:20.87058
[2]	validation_0-mae:19.58445	validation_1-mae:20.23423
[3]	validation_0-mae:19.01740	validation_1-mae:19.62991
[4]	validation_0-mae:18.48332	validation_1-mae:19.05428
[5]	validation_0-mae:17.98105	validation_1-mae:18.56000
[6]	validation_0-mae:17.51240	validation_1-mae:18.08306
[7]	validation_0-mae:17.08961	validation_1-mae:17.62856
[8]	validation_0-mae:16.67798	validation_1-mae:17.20270




[9]	validation_0-mae:16.30935	validation_1-mae:16.82968
[10]	validation_0-mae:15.97121	validation_1-mae:16.49098
[11]	validation_0-mae:15.64597	validation_1-mae:16.15959
[12]	validation_0-mae:15.32988	validation_1-mae:15.86912
[13]	validation_0-mae:15.06103	validation_1-mae:15.61749
[14]	validation_0-mae:14.79133	validation_1-mae:15.34449
[15]	validation_0-mae:14.53598	validation_1-mae:15.09420
[16]	validation_0-mae:14.29122	validation_1-mae:14.85646
[17]	validation_0-mae:14.05552	validation_1-mae:14.66086
[18]	validation_0-mae:13.84452	validation_1-mae:14.44749
[19]	validation_0-mae:13.65796	validation_1-mae:14.27399
[20]	validation_0-mae:13.46966	validation_1-mae:14.12047
[21]	validation_0-mae:13.30023	validation_1-mae:13.95696
[22]	validation_0-mae:13.14236	validation_1-mae:13.82092
[23]	validation_0-mae:13.01639	validation_1-mae:13.70632
[24]	validation_0-mae:12.87006	validation_1-mae:13.56968
[25]	validation_0-mae:12.73436	validation_1-mae:13.44740
[26]	validation_0-mae:12.61850	v

In [57]:
importance = xgb_reg.feature_importances_
feature_names = X_train_transformed.columns
importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': importance})
importance_df.sort_values(by='Importance', ascending=False, inplace=True)

In [58]:
importance_df

Unnamed: 0,Feature,Importance
36,minmaxscaler__wind_speed_100m:ms,0.161653
32,minmaxscaler__wind_speed_50m:ms,0.098042
40,minmaxscaler__month_cosine,0.083464
52,minmaxscaler__cos_wind_dir_100m:d,0.044203
42,minmaxscaler__season_cosine,0.038545
47,minmaxscaler__sin_wind_dir_10m:d,0.034884
48,minmaxscaler__cos_wind_dir_10m:d,0.032621
25,minmaxscaler__t_10m:C,0.025821
3,onehotencoder__precip_type:idx_3.0,0.025042
45,minmaxscaler__sin_wind_dir_2m:d,0.023791


In [59]:
wind_columns_drop = list(importance_df[importance_df['Importance']<0.02]['Feature'])

In [60]:
df = df.drop(columns = wind_columns_drop).copy()

In [61]:
df

Unnamed: 0,onehotencoder__precip_type:idx_3.0,minmaxscaler__wind_speed_2m:ms,minmaxscaler__t_10m:C,minmaxscaler__wind_speed_50m:ms,minmaxscaler__wind_speed_100m:ms,minmaxscaler__month_cosine,minmaxscaler__season_cosine,minmaxscaler__sin_wind_dir_2m:d,minmaxscaler__sin_wind_dir_10m:d,minmaxscaler__cos_wind_dir_10m:d,minmaxscaler__sin_wind_dir_50m:d,minmaxscaler__cos_wind_dir_50m:d,minmaxscaler__cos_wind_dir_100m:d,wind_production
0,0.0,0.950821,0.527716,0.880799,0.900239,0.933013,1.0,0.045382,0.045382,0.291860,0.042169,0.299026,0.307049,40.59
1,0.0,0.945231,0.525499,0.871100,0.892636,0.933013,1.0,0.023405,0.023405,0.348815,0.023669,0.347984,0.346324,67.86
2,0.0,0.960869,0.529933,0.907344,0.923069,0.933013,1.0,0.018654,0.018654,0.364700,0.019129,0.363021,0.360507,116.68
3,0.0,0.980051,0.527716,0.953354,0.961967,0.933013,1.0,0.017037,0.017037,0.370590,0.017952,0.367222,0.363862,120.22
4,0.0,0.977599,0.501109,0.946671,0.956468,0.933013,1.0,0.031970,0.031970,0.324079,0.031664,0.324897,0.326534,109.86
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9510,0.0,0.908292,0.263858,0.769551,0.793619,0.933013,1.0,0.142764,0.142764,0.150168,0.110331,0.186698,0.237266,21.98
9511,0.0,0.908292,0.252772,0.773650,0.799975,0.933013,1.0,0.183310,0.183310,0.113080,0.147064,0.145830,0.190797,9.60
9512,0.0,0.905022,0.248337,0.765366,0.793619,0.933013,1.0,0.187379,0.187379,0.109785,0.149545,0.143375,0.190112,22.61
9513,0.0,0.901599,0.243902,0.756721,0.787048,0.933013,1.0,0.204697,0.204697,0.096520,0.160279,0.133135,0.186020,21.70


## Prepare the dataset

### Folds

In [308]:
# --------------------------------------------------- #
# Let's consider FOLDS with a length of 4 months      #
# (3 years will be used for train, 1 for test!)       #
# --------------------------------------------------- #

FOLD_LENGTH = 24 * 7 * 16                             # 24 times a day * for 7 days * for 16 weeks


# --------------------------------------------------- #
# Let's consider FOLDS starting every 2 weeks         #
# --------------------------------------------------- #

FOLD_STRIDE = 24 * 7 * 2                              # 24 times a day for 2 weeks


# --------------------------------------------------- #
# Let's consider a train-test-split ratio of 2/3      #
# --------------------------------------------------- #

TRAIN_VAL_RATIO = 0.75                               # 3 month train, 1 month test

In [None]:
def get_folds(
    df: pd.DataFrame,
    fold_length: int,
    fold_stride: int) -> List[pd.DataFrame]:
    '''
    This function slides through the Time Series dataframe of shape (n_timesteps, n_features) to create folds
    - of equal `fold_length`
    - using `fold_stride` between each fold

    Returns a list of folds, each as a DataFrame
    '''
    # YOUR CODE HERE
    folds = []
    for idx in range(0, len(df)-fold_length, fold_stride): # --> also possible to get rid of the break
    #for idx in range(0, len(df), fold_stride):   # range(start, stop, step): for each idx in our rows at every 91 days
        # Exits the loop as soon as the last fold index would exceed the last index
        #if (idx + fold_length) > len(df):
            #break
        fold = df.iloc[idx:idx + fold_length, :]  # select from row idx til last row of the fold (3 years), all the columns
        folds.append(fold)   # append the 3 year fold to folds
    return folds

### Train-Val split

In [None]:
def train_val_split(fold:pd.DataFrame,
                     train_val_ratio: float,
                     input_length: int) -> Tuple[pd.DataFrame]:
    '''
    Returns a train dataframe and a test dataframe (fold_train, fold_test)
    from which one can sample (X,y) sequences.
    df_train should contain all the timesteps until round(train_test_ratio * len(fold))
    '''
    # YOUR CODE HERE
    # TRAIN SET
    # ======================
    last_train_idx = round(train_val_ratio * len(fold))  # 0.66 * number of rows in the fold (66% of the fold for train)
    fold_train = fold.iloc[0:last_train_idx, :]   # 1st until last row of train set, all columns

    # TEST SET
    # ======================
    first_val_idx = last_train_idx - input_length  # last row of train set - 2 weeks --> test set starts 2 weeks
                                                                    # before train set ends --> overlap (not a problem with X)
    fold_val = fold.iloc[first_val_idx:, :]   # 1st until last row of val set, all columns

    return (fold_train, fold_val)

In [None]:
(fold_train, fold_val) = train_val_split(folds[0], TRAIN_VAL_RATIO, INPUT_LENGTH)

In [None]:
#Five days as input & output length
INPUT_LENGTH = 24 * 5 # records every hour x 24 hours
                      # for 5 days
OUTPUT_LENGTH = 12

## Model

### Prepare dataset for model training

#### Train-Val split

In [62]:
def train_val_split(df:pd.DataFrame,
                     train_val_ratio: float,
                     input_length: int) -> Tuple[pd.DataFrame]:
    '''
    Returns a train dataframe and a test dataframe (fold_train, fold_test)
    from which one can sample (X,y) sequences.
    df_train should contain all the timesteps until round(train_test_ratio * len(fold))
    '''
    # YOUR CODE HERE
    # TRAIN SET
    # ======================
    last_train_idx = round(train_val_ratio * len(df))  # 0.66 * number of rows in the fold (66% of the fold for train)
    fold_train = df.iloc[0:last_train_idx, :]   # 1st until last row of train set, all columns

    # TEST SET
    # ======================
    first_val_idx = last_train_idx - input_length  # last row of train set - 2 weeks --> test set starts 2 weeks
                                                                    # before train set ends --> overlap (not a problem with X)
    fold_val = df.iloc[first_val_idx:, :]   # 1st until last row of val set, all columns

    return (fold_train, fold_val)

#### Sequences

In [63]:
#Five days as input & output length
INPUT_LENGTH = 24 * 5 # records every hour x 24 hours
                      # for 5 days
OUTPUT_LENGTH = 24

In [64]:
def get_Xi_yi(
    df:pd.DataFrame,
    input_length:int,  # 120
    output_length:int):  # 120
    '''
    - given a fold, it returns one sequence (X_i, y_i)
    - with the starting point of the sequence being chosen at random
    '''
    # YOUR CODE
    first_possible_start = 0                                    # the +1 accounts for the index, that is exclusive.
    last_possible_start = len(df) - (input_length + output_length) + 1    # It can start as long as there are still
                                                                             # 120 + 1 days after the 1st day.
    random_start = np.random.randint(first_possible_start, last_possible_start)  # np.random to pick a day inside
                                                                                    # the possible interval.
    X_i = df.iloc[random_start:random_start+input_length]

    y_i = df.iloc[random_start+input_length:
                  random_start+input_length+output_length][TARGET]  # creates a pd.DataFrame for the target y

    return (X_i, y_i)

In [65]:
TRAIN_VAL_RATIO = 0.75
NUMBER_OF_SEQUENCES_TRAIN = int(3000 * 0.75)
NUMBER_OF_SEQUENCES_VAL = int(3000 * 0.25)

In [66]:
def get_X_y(
    df:pd.DataFrame,
    number_of_sequences:int,
    input_length:int,
    output_length:int
):
    # YOUR CODE HERE
    X, y = [], []  # lists for the sequences for X and y

    for i in range(number_of_sequences):
        (Xi, yi) = get_Xi_yi(df, input_length, output_length)   # calls the previous function to generate sequences X + y
        X.append(Xi)
        y.append(yi)

    return np.array(X), np.array(y)

In [67]:
df_train, df_val = train_val_split(df, TRAIN_VAL_RATIO, INPUT_LENGTH)
X_train, y_train = get_X_y(df_train, NUMBER_OF_SEQUENCES_TRAIN, INPUT_LENGTH, OUTPUT_LENGTH)
X_val, y_val = get_X_y(df_val, NUMBER_OF_SEQUENCES_VAL, INPUT_LENGTH, OUTPUT_LENGTH)


#Dropping the targets from the X (we dont want to train the model on the targets)
X_train = X_train[:, :, :-1]
X_val = X_val[:, :, :-1]

##### Show data

In [68]:
#Training set (splits the whole dataset into training and val set)
df_train.shape

(7136, 14)

In [69]:
#Validation set
df_val.shape

(2499, 14)

In [70]:
#Sequences of features of df_train (Splits the training set into smaller sequences with each shape of (120, 53))
# Train model on these features -> 53 features with 120 past hours
X_train.shape

(2250, 120, 13)

In [71]:
y_train = np.expand_dims(y_train, axis=-1)

In [72]:
#Sequences of corresponding targets of df_train (Splits the training set into smaller sequences with each shape of (120, 3))
# Train model on these targets -> 3 targets with 12 next hours
y_train.shape

(2250, 24, 1)

In [73]:
#Sequences of features of df_val (Splits the val set into 750 sequences with each shape of (120, 53))
X_val.shape

(750, 120, 13)

In [74]:
y_val = np.expand_dims(y_val, axis=-1)

In [75]:
#Sequences of corresponding targets of df_val (Splits the val set into 750 sequences with each shape of (120, 3))
y_val.shape

(750, 24, 1)

In [76]:
X_train.shape[1:]

(120, 13)

##### How do the dfs look like?

In [None]:
df_train

Unnamed: 0,minmaxscaler__hour_sine,minmaxscaler__hour_cosine,minmaxscaler__month_sine,minmaxscaler__month_cosine,minmaxscaler__season_sine,minmaxscaler__season_cosine,minmaxscaler__precip_1h:mm,minmaxscaler__prob_precip_1h:p,minmaxscaler__clear_sky_rad:W,minmaxscaler__clear_sky_energy_1h:J,...,robustscaler__wind_speed_10m:ms,robustscaler__wind_speed_50m:ms,robustscaler__wind_speed_100m:ms,onehotencoder__precip_type:idx_0.0,onehotencoder__precip_type:idx_1.0,onehotencoder__precip_type:idx_2.0,onehotencoder__precip_type:idx_3.0,pv_production,wind_production,consumption
0,0.370590,0.017037,0.750000,0.933013,0.5,1.0,0.0,0.0,0.757424,0.854846,...,0.912001,0.919976,0.936622,1.0,0.0,0.0,0.0,0.0,40.59,26.514689
1,0.250000,0.066987,0.750000,0.933013,0.5,1.0,0.0,0.0,0.000000,0.779972,...,0.845489,0.859477,0.887809,1.0,0.0,0.0,0.0,0.0,67.86,28.326960
2,0.146447,0.146447,0.750000,0.933013,0.5,1.0,0.0,0.0,0.000000,0.000000,...,1.111203,1.085550,1.083210,1.0,0.0,0.0,0.0,0.0,116.68,23.682207
3,0.066987,0.250000,0.750000,0.933013,0.5,1.0,0.0,0.0,0.000000,0.000000,...,1.434093,1.372530,1.332958,1.0,0.0,0.0,0.0,0.0,120.22,25.354782
4,0.017037,0.370590,0.750000,0.933013,0.5,1.0,0.0,0.0,0.000000,0.000000,...,1.375834,1.330847,1.297649,1.0,0.0,0.0,0.0,0.0,109.86,23.861942
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7131,0.066987,0.250000,0.066987,0.750000,0.0,0.5,0.0,0.0,0.000000,0.789028,...,0.086402,0.000000,-0.105984,1.0,0.0,0.0,0.0,0.0,-0.33,22.159622
7132,0.017037,0.370590,0.066987,0.750000,0.0,0.5,0.0,0.0,0.000000,0.000000,...,-0.246347,-0.316057,-0.383700,1.0,0.0,0.0,0.0,0.0,-0.34,24.798298
7133,0.000000,0.500000,0.066987,0.750000,0.0,0.5,0.0,0.0,0.000000,0.000000,...,-0.045545,-0.130643,-0.252573,1.0,0.0,0.0,0.0,0.0,-0.36,30.817582
7134,0.017037,0.629410,0.066987,0.750000,0.0,0.5,0.0,0.0,0.000000,0.000000,...,-0.302016,-0.398752,-0.491991,1.0,0.0,0.0,0.0,0.0,-0.30,26.371867


In [None]:
df_val

Unnamed: 0,minmaxscaler__hour_sine,minmaxscaler__hour_cosine,minmaxscaler__month_sine,minmaxscaler__month_cosine,minmaxscaler__season_sine,minmaxscaler__season_cosine,minmaxscaler__precip_1h:mm,minmaxscaler__prob_precip_1h:p,minmaxscaler__clear_sky_rad:W,minmaxscaler__clear_sky_energy_1h:J,...,robustscaler__wind_speed_10m:ms,robustscaler__wind_speed_50m:ms,robustscaler__wind_speed_100m:ms,onehotencoder__precip_type:idx_0.0,onehotencoder__precip_type:idx_1.0,onehotencoder__precip_type:idx_2.0,onehotencoder__precip_type:idx_3.0,pv_production,wind_production,consumption
7016,0.146447,0.853553,0.066987,0.750000,0.0,0.5,0.0,0.0,0.0,0.0,...,-0.624904,-0.741295,-0.941294,1.0,0.0,0.0,0.0,0.0,9.84,27.835067
7017,0.250000,0.933013,0.066987,0.750000,0.0,0.5,0.0,0.0,0.0,0.0,...,-0.624904,-0.686177,-0.837521,1.0,0.0,0.0,0.0,0.0,12.59,19.498231
7018,0.370590,0.982963,0.066987,0.750000,0.0,0.5,0.0,0.0,0.0,0.0,...,-0.485613,-0.487358,-0.454837,1.0,0.0,0.0,0.0,0.0,22.86,16.549779
7019,0.500000,1.000000,0.066987,0.750000,0.0,0.5,0.0,0.0,0.0,0.0,...,-0.360275,-0.276690,-0.221841,1.0,0.0,0.0,0.0,0.0,6.29,20.053793
7020,0.629410,0.982963,0.066987,0.750000,0.0,0.5,0.0,0.0,0.0,0.0,...,-0.360275,-0.316057,-0.252573,1.0,0.0,0.0,0.0,0.0,8.44,20.168304
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9510,0.017037,0.629410,0.750000,0.933013,0.5,1.0,0.0,0.0,0.0,0.0,...,0.205763,0.226072,0.252054,1.0,0.0,0.0,0.0,0.0,21.98,44.422658
9511,0.066987,0.750000,0.750000,0.933013,0.5,1.0,0.0,0.0,0.0,0.0,...,0.205763,0.251639,0.292863,1.0,0.0,0.0,0.0,0.0,9.60,45.167707
9512,0.146447,0.853553,0.750000,0.933013,0.5,1.0,0.0,0.0,0.0,0.0,...,0.127466,0.199967,0.252054,1.0,0.0,0.0,0.0,0.0,22.61,32.476198
9513,0.250000,0.933013,0.750000,0.933013,0.5,1.0,0.0,0.0,0.0,0.0,...,0.086402,0.146047,0.209862,1.0,0.0,0.0,0.0,0.0,21.70,28.561791


In [None]:
X_train

array([[[0.14644661, 0.85355339, 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0.25      , 0.9330127 , 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0.37059048, 0.98296291, 0.        , ..., 0.        ,
         0.        , 0.        ],
        ...,
        [0.        , 0.5       , 0.0669873 , ..., 0.        ,
         0.        , 0.        ],
        [0.01703709, 0.62940952, 0.0669873 , ..., 0.        ,
         0.        , 0.        ],
        [0.0669873 , 0.75      , 0.0669873 , ..., 0.        ,
         0.        , 0.        ]],

       [[0.01703709, 0.37059048, 0.9330127 , ..., 0.        ,
         0.        , 0.        ],
        [0.        , 0.5       , 0.9330127 , ..., 0.        ,
         0.        , 0.        ],
        [0.01703709, 0.62940952, 0.9330127 , ..., 0.        ,
         0.        , 0.        ],
        ...,
        [0.25      , 0.0669873 , 1.        , ..., 0.        ,
         0.        , 0.        ],
        [0.1

In [None]:
y_train

array([[[ 0.00000000e+00, -2.50000000e-01,  2.01589867e+01],
        [ 0.00000000e+00, -2.40000000e-01,  1.14855933e+01],
        [ 0.00000000e+00, -2.40000000e-01,  7.91753500e+00],
        ...,
        [ 4.69658320e+00,  3.20700000e+01,  1.91948489e+01],
        [ 2.56067500e+01,  6.61500000e+01,  1.90871978e+01],
        [ 4.03999160e+01,  4.54600000e+01,  1.92594511e+01]],

       [[ 0.00000000e+00,  7.78800000e+01,  2.23920422e+01],
        [ 0.00000000e+00,  9.82200000e+01,  5.81534222e+01],
        [ 0.00000000e+00,  8.96200000e+01,  4.77380222e+01],
        ...,
        [ 0.00000000e+00,  2.49700000e+01,  2.66239356e+01],
        [ 0.00000000e+00,  1.94200000e+01,  2.61866644e+01],
        [ 0.00000000e+00,  1.48300000e+01,  2.51457422e+01]],

       [[ 4.50604170e+01, -0.00000000e+00,  1.58383333e+01],
        [ 5.72908340e+01, -0.00000000e+00,  1.24288600e+01],
        [ 6.60800840e+01, -0.00000000e+00,  1.05990067e+01],
        ...,
        [ 2.58024170e+01, -0.00000000e+00,

In [None]:
X_val

array([[[0.62940952, 0.98296291, 0.5       , ..., 0.        ,
         0.        , 0.        ],
        [0.75      , 0.9330127 , 0.5       , ..., 0.        ,
         0.        , 0.        ],
        [0.85355339, 0.85355339, 0.5       , ..., 0.        ,
         0.        , 0.        ],
        ...,
        [0.25      , 0.9330127 , 0.5       , ..., 0.        ,
         0.        , 0.        ],
        [0.37059048, 0.98296291, 0.5       , ..., 0.        ,
         0.        , 0.        ],
        [0.5       , 1.        , 0.5       , ..., 0.        ,
         0.        , 0.        ]],

       [[0.25      , 0.9330127 , 0.25      , ..., 0.        ,
         0.        , 0.        ],
        [0.37059048, 0.98296291, 0.25      , ..., 0.        ,
         0.        , 0.        ],
        [0.5       , 1.        , 0.25      , ..., 0.        ,
         0.        , 0.        ],
        ...,
        [0.01703709, 0.62940952, 0.25      , ..., 0.        ,
         0.        , 0.        ],
        [0.0

In [None]:
y_val

array([[[ 0.00000000e+00,  2.06500000e+01,  2.00543022e+01],
        [ 0.00000000e+00,  2.91900000e+01,  1.78826067e+01],
        [ 0.00000000e+00,  2.46700000e+01,  1.98055756e+01],
        ...,
        [ 9.19167000e-02,  9.28000000e+00,  1.69353156e+01],
        [ 3.33340000e-03,  8.01000000e+00,  1.75387989e+01],
        [ 1.64083300e-01,  5.83000000e+00,  1.92496711e+01]],

       [[ 0.00000000e+00, -3.10000000e-01,  1.74297622e+01],
        [ 0.00000000e+00,  2.95000000e+00,  1.73931400e+01],
        [ 0.00000000e+00,  2.38000000e+00,  1.73617000e+01],
        ...,
        [ 3.78182000e-02,  1.31200000e+01,  2.70674578e+01],
        [ 6.45750000e-01,  5.44000000e+00,  2.97449111e+01],
        [ 2.51175000e+00,  1.74000000e+00,  2.53388800e+01]],

       [[ 0.00000000e+00, -4.00000000e-01,  2.71397933e+01],
        [ 0.00000000e+00, -3.60000000e-01,  2.88521733e+01],
        [ 3.37583400e-01, -4.70000000e-01,  2.85557200e+01],
        ...,
        [ 0.00000000e+00,  1.41900000e+01,

### Train model on mini sequences

In [254]:
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers, metrics
from tensorflow.keras.regularizers import L1L2

def init_model(X_train, y_train):

    # 1 - RNN architecture
    # ======================
    model = models.Sequential()

    ## 1.1 - Recurrent Layer
    model.add(layers.LSTM(64,
                          activation='tanh',
                          return_sequences = True,
                          input_shape=(120, 53)

                         ))

    model.add(layers.LSTM(units=32, activation='tanh'))

    ## 1.2 Hidden layer
    model.add(layers.Dense(10, activation="linear"))

    ## 1.2 - Predictive Dense Layers
    output_length = y_train.shape[1]
    model.add(layers.Dense(output_length*3, activation='linear'))  # Flatten the output
    model.add(layers.Reshape((output_length, 3)))  # Reshape to (12, 3)


    # 2 - Compiler
    # ======================
    adam = optimizers.Adam(learning_rate=0.005)
    model.compile(loss='mse', optimizer=adam, metrics=["mae"])

    return model

2024-11-28 17:17:08.838379: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [38]:
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers, metrics
from tensorflow.keras.regularizers import L1L2

def init_model(X_train, y_train):

    # 1 - RNN architecture
    # ======================
    model = models.Sequential()

    ## 1.1 - Recurrent Layers
    model.add(layers.LSTM(64,
                          activation='tanh',
                          return_sequences=True,  # Keep output for each time step
                          input_shape=(120, 53)))

    model.add(layers.LSTM(32,
                          activation='tanh',
                          return_sequences=True))  # Keep the sequence dimension

    ## 1.2 - Hidden Dense Layer
    model.add(layers.TimeDistributed(layers.Dense(64, activation="relu")))  # Apply to each time step

    ## 1.3 - Predictive Dense Layer
    output_length = y_train.shape[1]  # 12 future steps
    num_targets = y_train.shape[2]    # 3 targets
    model.add(layers.TimeDistributed(layers.Dense(num_targets, activation='linear')))  # Directly output 3 targets

    # 2 - Compiler
    # ======================
    adam = optimizers.Adam(learning_rate=0.005)
    model.compile(loss='mse', optimizer=adam, metrics=["mae"])

    return model


In [57]:
from tensorflow.keras.metrics import MeanAbsoluteError
from tensorflow.keras import regularizers


def init_model(X_train, y_train):

    # 1 - RNN architecture
    # ======================
    model = models.Sequential()

    ## 1.1 - Recurrent Layers
    model.add(layers.LSTM(32,
                          activation='tanh',
                          return_sequences=True,
                          input_shape=(120, 53),
                          kernel_regularizer=regularizers.l2(0.01)))

    model.add(layers.Dropout(0.3))

    model.add(layers.LSTM(16, activation='tanh', return_sequences=True))

    ## 1.2 - Slice the output to focus only on the last 12 time steps
    model.add(layers.Lambda(lambda x: x[:, -12:, :]))  # Keep only the last 12 time steps

    ## 1.3 - Hidden Dense Layer
    model.add(layers.TimeDistributed(layers.Dense(64, activation="relu")))

    ## 1.4 - Predictive Dense Layer
    num_targets = y_train.shape[2]    # 3 targets
    model.add(layers.TimeDistributed(layers.Dense(num_targets, activation='linear')))

    # 2 - Compiler
    # ======================
    adam = optimizers.Adam(learning_rate=0.005)
    model.compile(loss='mse',
                  optimizer=adam,
                  metrics=[
                      MeanAbsoluteError(name="mae_target_1"),
                      MeanAbsoluteError(name="mae_target_2"),
                      MeanAbsoluteError(name="mae_target_3")
                  ])


    return model


In [45]:
model = init_model(X_train, y_train)
model.summary()

In [52]:
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
from typing import Tuple

def fit_model(model: tf.keras.Model, verbose=1) -> Tuple[tf.keras.Model, dict]:

    es = EarlyStopping(monitor = "val_loss",
                      patience = 15,
                      mode = "min",
                      restore_best_weights = True)


    history = model.fit(X_train, y_train,
                        validation_data=(X_val, y_val),
                        shuffle = False,
                        batch_size = 32,
                        epochs = 500,
                        callbacks = [es],
                        verbose = verbose)

    return model, history

In [48]:
model = init_model(X_train, y_train)
model.summary()

# 2 - Training
# ====================================
model, history = fit_model(model)

  super().__init__(**kwargs)


Epoch 1/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - loss: 762.2496 - mae: 15.8275 - val_loss: 529.4869 - val_mae: 13.9109
Epoch 2/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 580.1478 - mae: 14.6266 - val_loss: 532.7062 - val_mae: 14.1112
Epoch 3/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 579.7707 - mae: 14.6867 - val_loss: 527.8139 - val_mae: 14.0395
Epoch 4/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 545.6751 - mae: 13.9649 - val_loss: 533.9302 - val_mae: 14.9059
Epoch 5/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 480.5840 - mae: 13.7800 - val_loss: 520.7446 - val_mae: 14.4100
Epoch 6/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 449.5996 - mae: 12.9849 - val_loss: 520.3669 - val_mae: 14.0889
Epoch 7/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━

In [58]:
model = init_model(X_train, y_train)
model.summary()

# 2 - Training
# ====================================
model, history = fit_model(model)

Epoch 1/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 39ms/step - loss: 718.1918 - mae_target_1: 15.4905 - mae_target_2: 15.4905 - mae_target_3: 15.4905 - val_loss: 538.4426 - val_mae_target_1: 14.3683 - val_mae_target_2: 14.3683 - val_mae_target_3: 14.3683
Epoch 2/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - loss: 581.2307 - mae_target_1: 14.7646 - mae_target_2: 14.7646 - mae_target_3: 14.7646 - val_loss: 532.6366 - val_mae_target_1: 14.0993 - val_mae_target_2: 14.0993 - val_mae_target_3: 14.0993
Epoch 3/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 572.0665 - mae_target_1: 14.5059 - mae_target_2: 14.5059 - mae_target_3: 14.5059 - val_loss: 602.0637 - val_mae_target_1: 15.8317 - val_mae_target_2: 15.8317 - val_mae_target_3: 15.8317
Epoch 4/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 507.0397 - mae_target_1: 13.7038 - mae_target_2: 13.7038 - mae_tar

#### One model for each feature

##### PV

In [59]:
y_train.shape

(2250, 12, 3)

In [61]:
y_train[:, :, 0].shape

(2250, 12)

In [143]:
y_train_pv = y_train[:, :, 0]

In [144]:
y_train_pv = np.expand_dims(y_train_pv, axis=-1)

In [145]:
y_train_pv.shape

(2250, 12, 1)

In [146]:
y_val_pv = y_val[:,:,0]

In [147]:
y_val_pv = np.expand_dims(y_val_pv, axis=-1)

In [148]:
y_val_pv.shape

(750, 12, 1)

In [149]:
from tensorflow.keras.metrics import MeanAbsoluteError
from tensorflow.keras import regularizers


def init_model(X_train, y_train):

    # 1 - RNN architecture
    # ======================
    model = models.Sequential()

    ## 1.1 - Recurrent Layers
    model.add(layers.LSTM(32,
                          activation='tanh',
                          return_sequences=True,
                          input_shape=(120, 53),
                          kernel_regularizer=regularizers.l2(0.01)))

    model.add(layers.Dropout(0.3))

    model.add(layers.LSTM(16, activation='tanh', return_sequences=True))

    ## 1.2 - Slice the output to focus only on the last 12 time steps
    model.add(layers.Lambda(lambda x: x[:, -12:, :]))  # Keep only the last 12 time steps

    ## 1.3 - Hidden Dense Layer
    model.add(layers.TimeDistributed(layers.Dense(64, activation="relu")))

    ## 1.4 - Predictive Dense Layer
    model.add(layers.TimeDistributed(layers.Dense(1, activation='linear')))

    # 2 - Compiler
    # ======================
    adam = optimizers.Adam(learning_rate=0.005)
    model.compile(loss='mse',
                  optimizer=adam,
                  metrics=['mae'
                  ])


    return model

In [69]:
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
from typing import Tuple

def fit_model(model: tf.keras.Model, verbose=1) -> Tuple[tf.keras.Model, dict]:

    es = EarlyStopping(monitor = "val_loss",
                      patience = 15,
                      mode = "min",
                      restore_best_weights = True)


    history = model.fit(X_train, y_train_pv,
                        validation_data=(X_val, y_val_pv),
                        shuffle = False,
                        batch_size = 32,
                        epochs = 500,
                        callbacks = [es],
                        verbose = verbose)

    return model, history

In [70]:
model = init_model(X_train, y_train_pv)
model.summary()

# 2 - Training
# ====================================
model, history = fit_model(model)

  super().__init__(**kwargs)


Epoch 1/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 27ms/step - loss: 318.3997 - mae: 10.8458 - val_loss: 51.8289 - val_mae: 5.9115
Epoch 2/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 126.6924 - mae: 6.5357 - val_loss: 72.2632 - val_mae: 4.5350
Epoch 3/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 106.6972 - mae: 5.7288 - val_loss: 58.6006 - val_mae: 4.0575
Epoch 4/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - loss: 96.8080 - mae: 5.3673 - val_loss: 39.0254 - val_mae: 2.6443
Epoch 5/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - loss: 91.0463 - mae: 5.0390 - val_loss: 30.5442 - val_mae: 2.4074
Epoch 6/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 84.7698 - mae: 4.7915 - val_loss: 43.1633 - val_mae: 3.0513
Epoch 7/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s

## Wind

In [256]:
y_train.shape

(2250, 12, 1)

In [83]:
from tensorflow.keras.metrics import MeanAbsoluteError
from tensorflow.keras import regularizers
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers, metrics
from tensorflow.keras.regularizers import L1L2

input_shape = X_train.shape[1:]
def init_model(X_train, y_train):

    # 1 - RNN architecture
    # ======================
    model = models.Sequential()

    ## 1.1 - Recurrent Layers
    model.add(layers.LSTM(32,
                          activation='tanh',
                          return_sequences=True,
                          input_shape=input_shape,
                          #kernel_regularizer=regularizers.l2(0.01)
                         )
             )

    #model.add(layers.Dropout(0.3))

    model.add(layers.LSTM(16, activation='tanh', return_sequences=True))

    ## 1.2 - Slice the output to focus only on the last 12 time steps
    model.add(layers.Lambda(lambda x: x[:, -24:, :]))  # Keep only the last 12 time steps

    ## 1.3 - Hidden Dense Layer
    model.add(layers.TimeDistributed(layers.Dense(64, activation="relu")))

    ## 1.4 - Predictive Dense Layer
    model.add(layers.TimeDistributed(layers.Dense(1, activation='linear')))

    # 2 - Compiler
    # ======================
    adam = optimizers.Adam(learning_rate=0.003)
    model.compile(loss='mse',
                  optimizer=adam,
                  metrics=['mae'
                  ])


    return model

In [84]:
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
from typing import Tuple

def fit_model(model: tf.keras.Model, verbose=1) -> Tuple[tf.keras.Model, dict]:

    es = EarlyStopping(monitor = "val_loss",
                      patience = 15,
                      mode = "min",
                      restore_best_weights = True)


    history = model.fit(X_train, y_train,
                        validation_data=(X_val, y_val),
                        shuffle = False,
                        batch_size = 32,
                        epochs = 500,
                        callbacks = [es],
                        verbose = verbose)

    return model, history

In [85]:
model = init_model(X_train, y_train)
model.summary()

# 2 - Training
# ====================================
model, history = fit_model(model)

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_6 (LSTM)               (None, 120, 32)           5888      
                                                                 
 lstm_7 (LSTM)               (None, 120, 16)           3136      
                                                                 
 lambda_3 (Lambda)           (None, 24, 16)            0         
                                                                 
 time_distributed_6 (TimeDis  (None, 24, 64)           1088      
 tributed)                                                       
                                                                 
 time_distributed_7 (TimeDis  (None, 24, 1)            65        
 tributed)                                                       
                                                                 
Total params: 10,177
Trainable params: 10,177
Non-trai

In [86]:
model.pred

array([[[0.00000000e+00, 9.05021637e-01, 3.56984479e-01, ...,
         5.71314467e-01, 5.11188844e-03, 1.34277059e-03],
        [0.00000000e+00, 8.94235970e-01, 3.54767184e-01, ...,
         6.15024869e-01, 1.34105738e-02, 3.51727005e-03],
        [0.00000000e+00, 8.81592900e-01, 3.48115299e-01, ...,
         7.55271459e-01, 7.00739176e-02, 2.05902737e-02],
        ...,
        [0.00000000e+00, 9.84723849e-01, 5.89800443e-01, ...,
         1.07455074e-02, 3.96898210e-01, 3.97754700e-01],
        [0.00000000e+00, 9.88043385e-01, 5.76496674e-01, ...,
         5.49206832e-03, 4.26095619e-01, 4.26098215e-01],
        [0.00000000e+00, 9.90176016e-01, 5.47671840e-01, ...,
         8.85637464e-03, 4.06309652e-01, 4.11460450e-01]],

       [[0.00000000e+00, 8.98009634e-01, 5.36585366e-01, ...,
         2.68351982e-01, 5.68982537e-02, 8.45083446e-02],
        [0.00000000e+00, 8.98009634e-01, 5.36585366e-01, ...,
         2.62187895e-01, 6.01757594e-02, 8.99246784e-02],
        [0.00000000e+00, 

##### Consumption

In [154]:
y_train_cons = y_train[:, :, 1]
y_train_cons = np.expand_dims(y_train_cons, axis=-1)
y_train_cons.shape

(2250, 12, 1)

In [155]:
y_val_cons = y_val[:,:,1]
y_val_cons = np.expand_dims(y_val_cons, axis=-1)
y_val_cons.shape

(750, 12, 1)

In [None]:
def init_model(X_train, y_train):

    # 1 - RNN architecture
    # ======================
    model = models.Sequential()

    ## 1.1 - Recurrent Layers
    model.add(layers.LSTM(32,
                          activation='tanh',
                          return_sequences=True,
                          input_shape=(120, 53),
                          kernel_regularizer=regularizers.l2(0.01)))

    model.add(layers.Dropout(0.3))

    model.add(layers.LSTM(16, activation='tanh', return_sequences=True))

    ## 1.2 - Slice the output to focus only on the last 12 time steps
    model.add(layers.Lambda(lambda x: x[:, -12:, :]))  # Keep only the last 12 time steps

    ## 1.3 - Hidden Dense Layer
    model.add(layers.TimeDistributed(layers.Dense(64, activation="relu")))

    ## 1.4 - Predictive Dense Layer
    model.add(layers.TimeDistributed(layers.Dense(1, activation='linear')))

    # 2 - Compiler
    # ======================
    adam = optimizers.Adam(learning_rate=0.005)
    model.compile(loss='mse',
                  optimizer=adam,
                  metrics=['mae'
                  ])


    return model

In [156]:
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
from typing import Tuple

def fit_model(model: tf.keras.Model, verbose=1) -> Tuple[tf.keras.Model, dict]:

    es = EarlyStopping(monitor = "val_loss",
                      patience = 30,
                      mode = "min",
                      restore_best_weights = True)


    history = model.fit(X_train, y_train_cons,
                        validation_data=(X_val, y_val_cons),
                        shuffle = False,
                        batch_size = 32,
                        epochs = 500,
                        callbacks = [es],
                        verbose = verbose)

    return model, history

In [157]:
model = init_model(X_train, y_train_cons)
model.summary()

# 2 - Training
# ====================================
model, history = fit_model(model)

  super().__init__(**kwargs)


Epoch 1/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 32ms/step - loss: 1542.7013 - mae: 22.4017 - val_loss: 1269.6810 - val_mae: 24.6305
Epoch 2/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 26ms/step - loss: 1330.2798 - mae: 25.2733 - val_loss: 1463.0802 - val_mae: 29.7571
Epoch 3/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - loss: 1134.5842 - mae: 22.0214 - val_loss: 1483.1251 - val_mae: 29.8648
Epoch 4/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 19ms/step - loss: 1104.0150 - mae: 22.0146 - val_loss: 1504.8530 - val_mae: 30.1231
Epoch 5/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - loss: 1089.6804 - mae: 21.8196 - val_loss: 1477.6597 - val_mae: 28.3208
Epoch 6/500
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - loss: 1051.7051 - mae: 20.9546 - val_loss: 1654.2979 - val_mae: 31.3178
Epoch 7/500
[1m71/71[0m [32m━━━━━━━━━

In [35]:
X_train_transformed.columns

Index(['minmaxscaler__hour_sine', 'minmaxscaler__hour_cosine',
       'minmaxscaler__month_sine', 'minmaxscaler__month_cosine',
       'minmaxscaler__season_sine', 'minmaxscaler__season_cosine',
       'minmaxscaler__precip_1h:mm', 'minmaxscaler__prob_precip_1h:p',
       'minmaxscaler__clear_sky_rad:W', 'minmaxscaler__clear_sky_energy_1h:J',
       'minmaxscaler__diffuse_rad:W', 'minmaxscaler__diffuse_rad_1h:Wh',
       'minmaxscaler__direct_rad:W', 'minmaxscaler__direct_rad_1h:Wh',
       'minmaxscaler__global_rad:W', 'minmaxscaler__global_rad_1h:Wh',
       'minmaxscaler__sunshine_duration_1h:min',
       'minmaxscaler__low_cloud_cover:p', 'minmaxscaler__medium_cloud_cover:p',
       'minmaxscaler__high_cloud_cover:p', 'minmaxscaler__total_cloud_cover:p',
       'minmaxscaler__effective_cloud_cover:p',
       'minmaxscaler__sin_sun_azimuth:d', 'minmaxscaler__cos_sun_azimuth:d',
       'minmaxscaler__sin_wind_dir_2m:d', 'minmaxscaler__cos_wind_dir_2m:d',
       'minmaxscaler__sin_win

## Cross Validation

### Sequences

In [None]:
def get_Xi_yi(
    df:pd.DataFrame,
    input_length:int,  # 120
    output_length:int):  # 120
    '''
    - given a fold, it returns one sequence (X_i, y_i)
    - with the starting point of the sequence being chosen at random
    '''
    # YOUR CODE
    first_possible_start = 0                                    # the +1 accounts for the index, that is exclusive.
    last_possible_start = len(df) - (input_length + output_length) + 1    # It can start as long as there are still
                                                                             # 120 + 1 days after the 1st day.
    random_start = np.random.randint(first_possible_start, last_possible_start)  # np.random to pick a day inside
                                                                                    # the possible interval.
    X_i = df.iloc[random_start:random_start+input_length]

    y_i = df.iloc[random_start+input_length:
                  random_start+input_length+output_length][TARGET]  # creates a pd.DataFrame for the target y

    return (X_i, y_i)

In [None]:
#Testing if the function works
Xi, Yi = get_Xi_yi(
    fold=folds[0],
    input_length=INPUT_LENGTH,
    output_length=OUTPUT_LENGTH)

In [None]:
Xi.shape

(120, 53)

In [None]:
### In the recap they choose a number which is bigger than the number of rows in the fold -> does that mean there are duplications???!!!
NUMBER_OF_SEQUENCES_TRAIN = (FOLD_LENGTH - (INPUT_LENGTH + OUTPUT_LENGTH))
NUMBER_OF_SEQUENCES_VAL = (FOLD_LENGTH - (INPUT_LENGTH + OUTPUT_LENGTH))

In [None]:
NUMBER_OF_SEQUENCES_TRAIN = len(df) - (INPUT_LENGTH + OUTPUT_LENGTH)

In [None]:
NUMBER_OF_SEQUENCES_TRAIN

9383

In [None]:
def get_X_y(
    df:pd.DataFrame,
    number_of_sequences:int,
    input_length:int,
    output_length:int
):
    # YOUR CODE HERE
    X, y = [], []  # lists for the sequences for X and y

    for i in range(number_of_sequences):
        (Xi, yi) = get_Xi_yi(df, input_length, output_length)   # calls the previous function to generate sequences X + y
        X.append(Xi)
        y.append(yi)

    return np.array(X), np.array(y)

In [None]:
seq = get_X_y(df, NUMBER_OF_SEQUENCES_TRAIN, INPUT_LENGTH, OUTPUT_LENGTH)

In [None]:
seq[1].shape

(9383, 12, 3)

In [None]:
X_train.shape

(2556, 120, 53)

In [None]:
y_train.shape

(2556, 12, 3)

In [None]:
y_temp = y_train[:,:,0]

In [None]:
y_temp = np.expand_dims(y_temp, axis=-1)

In [None]:
y_temp.shape

(2556, 12, 1)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

def cross_validate_baseline_and_lstm():
    '''
    This function cross-validates
    - the "last seen value" baseline model
    - the RNN model
    '''

    list_of_mae_baseline_model = []
    list_of_mae_recurrent_model = []

    # 0 - Creating folds
    # =========================================
    folds = get_folds(df, FOLD_LENGTH, FOLD_STRIDE)  # function we coded to get the folds

    for fold_id, fold in enumerate(folds):

        # 1 - Train/val split the current fold
        # =========================================
        (fold_train, fold_val) = train_val_split(fold, TRAIN_VAL_RATIO, INPUT_LENGTH) # function we coded to split train/val

        X_train, y_train = get_X_y(fold_train, NUMBER_OF_SEQUENCES_TRAIN, INPUT_LENGTH, OUTPUT_LENGTH)  # function we coded to get multiple
        X_val, y_val = get_X_y(fold_val, NUMBER_OF_SEQUENCES_TRAIN, INPUT_LENGTH, OUTPUT_LENGTH)       # sequences from a fold

        # 2 - Modelling
        # =========================================

        ##### Baseline Model
        baseline_model = init_baseline()
        mae_baseline = baseline_model.evaluate(X_val, y_val, verbose=0)[1]   # evaluating baseline model (metric)
        list_of_mae_baseline_model.append(mae_baseline)
        print("-"*50)
        print(f"MAE baseline fold n°{fold_id} = {round(mae_baseline, 2)}")

        ##### LSTM Model
        model = init_model(X_train, y_train)
        es = EarlyStopping(monitor = "val_mae",
                           mode = "min",
                           patience = 3,
                           restore_best_weights = True)

        history = model.fit(X_train, y_train,
                            validation_split = 0.3,
                            shuffle = False,
                            batch_size = 32,
                            epochs = 50,
                            callbacks = [es],
                            verbose = 0)
        res = model.evaluate(X_val, y_val, verbose=0)    # evaluating LSTM (metric)
        mae_lstm = res[1]
        list_of_mae_recurrent_model.append(mae_lstm)
        print(f"MAE LSTM fold n°{fold_id} = {round(mae_lstm, 2)}")

        ##### Comparison LSTM vs Baseline for the current fold
        print(f"🏋🏽‍♂️ improvement over baseline: {round((1 - (mae_lstm/mae_baseline))*100,2)} % \n")

    return list_of_mae_baseline_model, list_of_mae_recurrent_model

In [None]:
###NOT IMPORTANT NOW

fold_length = 24 * 7 * 16    # 24 hours * 7 days * 16 weeks (4 months)
fold_stride = 24 * 7 * 2     # 24 hours * 7 days * 2 weeks
train_test_ratio = 0.66

In [None]:
def get_folds(
    df: pd.DataFrame,
    fold_length: int,
    fold_stride: int) -> List[pd.DataFrame]:
    '''
    This function slides through the Time Series dataframe of shape (n_timesteps, n_features) to create folds
    - of equal `fold_length`
    - using `fold_stride` between each fold

    Returns a list of folds, each as a DataFrame
    '''
    # YOUR CODE HERE
    folds = []
    for idx in range(0, len(df)-fold_length, fold_stride): # --> also possible to get rid of the break
    #for idx in range(0, len(df), fold_stride):   # range(start, stop, step): for each idx in our rows at every 91 days
        # Exits the loop as soon as the last fold index would exceed the last index
        #if (idx + fold_length) > len(df):
            #break
        fold = df.iloc[idx:idx + fold_length, :]  # select from row idx til last row of the fold (3 years), all the columns
        folds.append(fold)   # append the 3 year fold to folds
    return folds