# First RNN

In [67]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Tuple, Sequence

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [110]:
import pandas as pd
train_data = pd.read_csv("/Users/carlbechtolsheim/code/Niki827/watt_squad/raw_data/train.csv")
test_data = pd.read_csv("/Users/carlbechtolsheim/code/Niki827/watt_squad/raw_data/test.csv")

In [111]:
train_data.columns

Index(['time', 'pv_production', 'wind_production', 'consumption',
       'spot_market_price', 'precip_1h:mm', 'precip_type:idx',
       'prob_precip_1h:p', 'clear_sky_rad:W', 'clear_sky_energy_1h:J',
       'diffuse_rad:W', 'diffuse_rad_1h:Wh', 'direct_rad:W',
       'direct_rad_1h:Wh', 'global_rad:W', 'global_rad_1h:Wh',
       'sunshine_duration_1h:min', 'sun_azimuth:d', 'sun_elevation:d',
       'low_cloud_cover:p', 'medium_cloud_cover:p', 'high_cloud_cover:p',
       'total_cloud_cover:p', 'effective_cloud_cover:p', 'temp',
       'relative_humidity_2m:p', 'dew_point_2m:C', 'wind_speed_2m:ms',
       'wind_dir_2m:d', 't_10m:C', 'relative_humidity_10m:p',
       'dew_point_10m:C', 'wind_speed_10m:ms', 'wind_dir_10m:d', 't_50m:C',
       'relative_humidity_50m:p', 'dew_point_50m:C', 'wind_speed_50m:ms',
       'wind_dir_50m:d', 't_100m:C', 'relative_humidity_100m:p',
       'dew_point_100m:C', 'wind_speed_100m:ms', 'wind_dir_100m:d'],
      dtype='object')

## Data preprocessing

In [69]:
import pandas as pd
import numpy as np

from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer

from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import FunctionTransformer

## lists of features:


# f_time = ['time']


## our custom functions:

def log_transformed(data):
    """ replaces values in columns in a dataframe with the log values """
    f_logs = [
    'precip_1h:mm',
    'prob_precip_1h:p',
    'clear_sky_rad:W',
    'clear_sky_energy_1h:J',
    'diffuse_rad:W',
    'diffuse_rad_1h:Wh',
    'direct_rad:W',
    'direct_rad_1h:Wh',
    'global_rad:W',
    'global_rad_1h:Wh',
    'wind_speed_2m:ms',
    'wind_speed_10m:ms',
    'wind_speed_50m:ms',
    'wind_speed_100m:ms'
]
    for col in f_logs:
        data[col] = np.log(data[col] + 1e-5)
    return data

def time_transformed(data):
    """takes a df and splits the 'time' feature into three features: hour, month, season;
    drops the original time column"""

    feature = pd.to_datetime(data.time)

    hour = feature.dt.hour
    month  = feature.dt.month

    def assign_season(month):
        if month in [3, 4, 5]:
            return 1  # Spring
        elif month in [6, 7, 8]:
            return 2  # Summer
        elif month in [9, 10, 11]:
            return 3  # Fall
        else:  # December, January, February
            return 4  # Winter

    season = month.apply(assign_season)
    hour_sine = np.sin(2 * np.pi * hour / 24)
    hour_cosine = np.cos(2 * np.pi * hour / 24)
    month_sine = np.sin(2 * np.pi * month / 12)
    month_cosine = np.cos(2 * np.pi * month / 12)
    season_sine = np.sin(2 * np.pi * season / 4)
    season_cosine = np.cos(2 * np.pi * season / 4)

    data["hour_sine"] = hour_sine
    data["hour_cosine"] = hour_cosine
    data["month_sine"] = month_sine
    data["month_cosine"] = month_cosine
    data["season_sine"] = season_sine
    data["season_cosine"] = season_cosine

    data = data.drop(columns=["time"])

    return data

def degree_transformed(data):
    """ takes a df 'data' and takes the features with degree units (in the specific list f_degree);
    creates a sin and cos column for each to make them cyclical. drops the original columns"""

    f_degree = ['sun_azimuth:d', 'wind_dir_2m:d', 'wind_dir_10m:d', 'wind_dir_50m:d', 'wind_dir_100m:d']

    for col in f_degree:
        sin_column = np.sin(2 * np.pi * data[col]/360)
        cos_column = np.cos(2 * np.pi * data[col]/360)

        data[f"sin_{col}"] = sin_column
        data[f"cos_{col}"] = cos_column
        data = data.drop(columns=[col])

    return data

def transform_data(data):
    """ applies the above three functions to the input dataframe """
    data = degree_transformed(time_transformed(log_transformed(data)))

    all_col = list(data.columns)

    # defining the columns we don't want in our X_train
    drop_col = ['pv_production',
            'wind_production',
            'consumption',
            'spot_market_price',
            'precip_type:idx']

    f_ohe = ['precip_type:idx']

    scale_col = [col for col in all_col if col not in drop_col and f_ohe]

    # defining our scalers
    minmax = MinMaxScaler()
    ohe = OneHotEncoder(handle_unknown='ignore', sparse_output = False)


    # our preproc pipline
    preproc = make_column_transformer(
        (ohe, f_ohe),
        (minmax, scale_col),
        remainder = "drop"
    )

    data_transformed = preproc.fit_transform(data)
    data_transformed = pd.DataFrame(data_transformed, columns=preproc.get_feature_names_out())
    data_transformed['onehotencoder__precip_type:idx_2.0'] = 0

    print('➡️ preprocessing done')
    return data_transformed



# ## building the pipeline

# data = pd.read_csv("raw_data/train.csv")

# # calling our custom functions on our dataframe
# data_ft = degree_transformed(time_transformed(log_transformed(data)))

# all_col = list(data_ft.columns)

# # defining the columns we don't want in our X_train
# drop_col = ['pv_production',
#             'wind_production',
#             'consumption',
#             'spot_market_price',
#             'precip_type:idx']

# # defining the columns we want to scale
# scale_col = [col for col in all_col if col not in drop_col and f_ohe]

# # defining our scalers
# minmax = MinMaxScaler()
# ohe = OneHotEncoder(handle_unknown='ignore', sparse_output = False)

# # our preproc pipline
# preproc = make_column_transformer(
#     (ohe, f_ohe),
#     (minmax, scale_col),
#     remainder = "drop"
# )

# data_transformed = preproc.fit_transform(data_ft)
# data_transformed = pd.DataFrame(data_transformed, columns=preproc.get_feature_names_out())


In [70]:
# creating y_train and y_test
y_train = train_data['consumption'].copy()
y_test = test_data['consumption'].copy()

# creating X_train and X_test
X_train = train_data
X_train = X_train.drop(columns=['pv_production', 'wind_production', 'consumption', 'spot_market_price'])
X_test = test_data
X_test = X_test.drop(columns=['pv_production', 'wind_production', 'consumption', 'spot_market_price'])

# Preprocessing features
X_train_transformed = transform_data(X_train)
X_test_transformed = transform_data(X_test)

➡️ preprocessing done
➡️ preprocessing done


In [71]:
TARGET = 'consumption'

In [72]:
y_train.describe()

count    9515.000000
mean       20.262091
std         8.311275
min         0.048396
25%        14.757704
50%        18.817098
75%        23.716080
max        70.366622
Name: consumption, dtype: float64

In [73]:
df = X_train_transformed.copy()

In [74]:
df['consumption'] = y_train

In [9]:
df

Unnamed: 0,onehotencoder__precip_type:idx_0.0,onehotencoder__precip_type:idx_1.0,onehotencoder__precip_type:idx_2.0,onehotencoder__precip_type:idx_3.0,minmaxscaler__precip_1h:mm,minmaxscaler__prob_precip_1h:p,minmaxscaler__clear_sky_rad:W,minmaxscaler__clear_sky_energy_1h:J,minmaxscaler__diffuse_rad:W,minmaxscaler__diffuse_rad_1h:Wh,...,minmaxscaler__cos_sun_azimuth:d,minmaxscaler__sin_wind_dir_2m:d,minmaxscaler__cos_wind_dir_2m:d,minmaxscaler__sin_wind_dir_10m:d,minmaxscaler__cos_wind_dir_10m:d,minmaxscaler__sin_wind_dir_50m:d,minmaxscaler__cos_wind_dir_50m:d,minmaxscaler__sin_wind_dir_100m:d,minmaxscaler__cos_wind_dir_100m:d,consumption
0,1.0,0.0,0,0.0,0.0,0.0,0.757424,0.854846,0.781999,0.815432,...,0.038568,0.045382,0.291860,0.045382,0.291860,0.042169,0.299026,0.038731,0.307049,26.514689
1,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.779972,0.000000,0.742604,...,0.095955,0.023405,0.348815,0.023405,0.348815,0.023669,0.347984,0.024203,0.346324,28.326960
2,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.173244,0.018654,0.364700,0.018654,0.364700,0.019129,0.363021,0.019853,0.360507,23.682207
3,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.265584,0.017037,0.370590,0.017037,0.370590,0.017952,0.367222,0.018891,0.363862,25.354782
4,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.369241,0.031970,0.324079,0.031970,0.324079,0.031664,0.324897,0.031056,0.326534,23.861942
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9510,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.600413,0.142764,0.150168,0.142764,0.150168,0.110331,0.186698,0.074594,0.237266,44.422658
9511,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.720968,0.183310,0.113080,0.183310,0.113080,0.147064,0.145830,0.107072,0.190797,45.167707
9512,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.836329,0.187379,0.109785,0.187379,0.109785,0.149545,0.143375,0.107612,0.190112,32.476198
9513,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.933029,0.204697,0.096520,0.204697,0.096520,0.160279,0.133135,0.110878,0.186020,28.561791


In [10]:
X_train_transformed

Unnamed: 0,onehotencoder__precip_type:idx_0.0,onehotencoder__precip_type:idx_1.0,onehotencoder__precip_type:idx_2.0,onehotencoder__precip_type:idx_3.0,minmaxscaler__precip_1h:mm,minmaxscaler__prob_precip_1h:p,minmaxscaler__clear_sky_rad:W,minmaxscaler__clear_sky_energy_1h:J,minmaxscaler__diffuse_rad:W,minmaxscaler__diffuse_rad_1h:Wh,...,minmaxscaler__sin_sun_azimuth:d,minmaxscaler__cos_sun_azimuth:d,minmaxscaler__sin_wind_dir_2m:d,minmaxscaler__cos_wind_dir_2m:d,minmaxscaler__sin_wind_dir_10m:d,minmaxscaler__cos_wind_dir_10m:d,minmaxscaler__sin_wind_dir_50m:d,minmaxscaler__cos_wind_dir_50m:d,minmaxscaler__sin_wind_dir_100m:d,minmaxscaler__cos_wind_dir_100m:d
0,1.0,0.0,0,0.0,0.0,0.0,0.757424,0.854846,0.781999,0.815432,...,0.307401,0.038568,0.045382,0.291860,0.045382,0.291860,0.042169,0.299026,0.038731,0.307049
1,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.779972,0.000000,0.742604,...,0.205451,0.095955,0.023405,0.348815,0.023405,0.348815,0.023669,0.347984,0.024203,0.346324
2,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.121531,0.173244,0.018654,0.364700,0.018654,0.364700,0.019129,0.363021,0.019853,0.360507
3,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.058350,0.265584,0.017037,0.370590,0.017037,0.370590,0.017952,0.367222,0.018891,0.363862
4,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.017398,0.369241,0.031970,0.324079,0.031970,0.324079,0.031664,0.324897,0.031056,0.326534
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9510,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.010188,0.600413,0.142764,0.150168,0.142764,0.150168,0.110331,0.186698,0.074594,0.237266
9511,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.051479,0.720968,0.183310,0.113080,0.183310,0.113080,0.147064,0.145830,0.107072,0.190797
9512,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.130026,0.836329,0.187379,0.109785,0.187379,0.109785,0.149545,0.143375,0.107612,0.190112
9513,1.0,0.0,0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.250030,0.933029,0.204697,0.096520,0.204697,0.096520,0.160279,0.133135,0.110878,0.186020


### Wind feature importance

In [40]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor

In [41]:
XG_X_train_transformed, XG_X_val, XG_y_train, XG_y_val = train_test_split(
X_train_transformed, y_train, test_size = 0.1, random_state = 42  # val = 10%
)

In [42]:
xgb_reg = XGBRegressor(
    max_depth=7,                # Optimal value found
    n_estimators=300,           # Optimal value found
    learning_rate=0.05,         # Optimal value found
    reg_alpha=0.05,             # Optimal value found
    reg_lambda=20,              # Optimal value found
    subsample=0.8,              # Optimal value found
    colsample_bytree=0.8,       # Optimal value found
    objective='reg:squarederror',
    eval_metric="mae",
    random_state=42             # Ensuring reproducibility
)

# Fit the model on the training data
xgb_reg.fit(
    XG_X_train_transformed,
    XG_y_train,
    eval_set=[(XG_X_train_transformed, XG_y_train), (XG_X_val, XG_y_val)],
    verbose=True,
    early_stopping_rounds=5     # Retain early stopping
)

print("➡️  model fitting done")

# Make predictions
y_pred = xgb_reg.predict(X_test_transformed)

print("➡️  performed predictions")

[0]	validation_0-mae:18.74866	validation_1-mae:19.21858
[1]	validation_0-mae:17.83222	validation_1-mae:18.31128
[2]	validation_0-mae:16.95416	validation_1-mae:17.43640
[3]	validation_0-mae:16.12614	validation_1-mae:16.60806
[4]	validation_0-mae:15.33915	validation_1-mae:15.81800
[5]	validation_0-mae:14.59095	validation_1-mae:15.07884
[6]	validation_0-mae:13.88384	validation_1-mae:14.37193
[7]	validation_0-mae:13.20659	validation_1-mae:13.69707




[8]	validation_0-mae:12.56160	validation_1-mae:13.05923
[9]	validation_0-mae:11.95466	validation_1-mae:12.45021
[10]	validation_0-mae:11.37652	validation_1-mae:11.87426
[11]	validation_0-mae:10.83037	validation_1-mae:11.32417
[12]	validation_0-mae:10.31296	validation_1-mae:10.81352
[13]	validation_0-mae:9.82377	validation_1-mae:10.32227
[14]	validation_0-mae:9.35623	validation_1-mae:9.85483
[15]	validation_0-mae:8.91674	validation_1-mae:9.41439
[16]	validation_0-mae:8.50189	validation_1-mae:9.00044
[17]	validation_0-mae:8.11077	validation_1-mae:8.60586
[18]	validation_0-mae:7.74036	validation_1-mae:8.24125
[19]	validation_0-mae:7.39620	validation_1-mae:7.89545
[20]	validation_0-mae:7.06795	validation_1-mae:7.56703
[21]	validation_0-mae:6.76201	validation_1-mae:7.26454
[22]	validation_0-mae:6.47747	validation_1-mae:6.97817
[23]	validation_0-mae:6.20549	validation_1-mae:6.71336
[24]	validation_0-mae:5.95416	validation_1-mae:6.47164
[25]	validation_0-mae:5.71907	validation_1-mae:6.23458
[

In [43]:
importance = xgb_reg.feature_importances_
feature_names = X_train_transformed.columns
importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': importance})
importance_df.sort_values(by='Importance', ascending=False, inplace=True)

In [44]:
importance_df

Unnamed: 0,Feature,Importance
25,minmaxscaler__t_10m:C,0.145035
37,minmaxscaler__hour_sine,0.111843
33,minmaxscaler__t_100m:C,0.094134
23,minmaxscaler__dew_point_2m:C,0.090748
40,minmaxscaler__month_cosine,0.068513
27,minmaxscaler__dew_point_10m:C,0.065991
38,minmaxscaler__hour_cosine,0.034842
43,minmaxscaler__sin_sun_azimuth:d,0.03251
42,minmaxscaler__season_cosine,0.031608
21,minmaxscaler__temp,0.024068


In [45]:
wind_columns_drop = list(importance_df[importance_df['Importance']<0.01]['Feature'])

In [46]:
wind_columns_drop

['minmaxscaler__relative_humidity_50m:p',
 'minmaxscaler__cos_wind_dir_10m:d',
 'minmaxscaler__relative_humidity_10m:p',
 'minmaxscaler__cos_wind_dir_100m:d',
 'minmaxscaler__wind_speed_10m:ms',
 'minmaxscaler__global_rad_1h:Wh',
 'minmaxscaler__wind_speed_50m:ms',
 'minmaxscaler__clear_sky_energy_1h:J',
 'minmaxscaler__diffuse_rad_1h:Wh',
 'minmaxscaler__direct_rad_1h:Wh',
 'minmaxscaler__sin_wind_dir_10m:d',
 'minmaxscaler__cos_wind_dir_50m:d',
 'minmaxscaler__effective_cloud_cover:p',
 'minmaxscaler__cos_wind_dir_2m:d',
 'minmaxscaler__sin_wind_dir_100m:d',
 'minmaxscaler__wind_speed_100m:ms',
 'minmaxscaler__relative_humidity_2m:p',
 'minmaxscaler__sunshine_duration_1h:min',
 'minmaxscaler__total_cloud_cover:p',
 'minmaxscaler__sun_elevation:d',
 'minmaxscaler__high_cloud_cover:p',
 'minmaxscaler__low_cloud_cover:p',
 'minmaxscaler__sin_wind_dir_50m:d',
 'minmaxscaler__sin_wind_dir_2m:d',
 'minmaxscaler__direct_rad:W',
 'minmaxscaler__wind_speed_2m:ms',
 'minmaxscaler__global_rad:W

In [47]:
df = df.drop(columns = wind_columns_drop).copy()

In [48]:
df

Unnamed: 0,minmaxscaler__temp,minmaxscaler__dew_point_2m:C,minmaxscaler__t_10m:C,minmaxscaler__dew_point_10m:C,minmaxscaler__t_50m:C,minmaxscaler__dew_point_50m:C,minmaxscaler__t_100m:C,minmaxscaler__relative_humidity_100m:p,minmaxscaler__dew_point_100m:C,minmaxscaler__hour_sine,minmaxscaler__hour_cosine,minmaxscaler__month_sine,minmaxscaler__month_cosine,minmaxscaler__season_sine,minmaxscaler__season_cosine,minmaxscaler__sin_sun_azimuth:d,minmaxscaler__cos_sun_azimuth:d,consumption
0,0.534066,0.541787,0.527716,0.533724,0.521348,0.528529,0.518100,0.489060,0.531722,0.370590,0.017037,0.75,0.933013,0.5,1.0,0.307401,0.038568,26.514689
1,0.529670,0.547550,0.525499,0.539589,0.521348,0.534535,0.520362,0.494208,0.537764,0.250000,0.066987,0.75,0.933013,0.5,1.0,0.205451,0.095955,28.326960
2,0.534066,0.541787,0.529933,0.536657,0.523596,0.528529,0.520362,0.480051,0.531722,0.146447,0.146447,0.75,0.933013,0.5,1.0,0.121531,0.173244,23.682207
3,0.534066,0.559078,0.527716,0.554252,0.521348,0.549550,0.518100,0.528958,0.552870,0.066987,0.250000,0.75,0.933013,0.5,1.0,0.058350,0.265584,25.354782
4,0.505495,0.610951,0.501109,0.607038,0.494382,0.603604,0.490950,0.715573,0.607251,0.017037,0.370590,0.75,0.933013,0.5,1.0,0.017398,0.369241,23.861942
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9510,0.268132,0.371758,0.263858,0.366569,0.260674,0.363363,0.260181,0.891892,0.371601,0.017037,0.629410,0.75,0.933013,0.5,1.0,0.010188,0.600413,44.422658
9511,0.257143,0.365994,0.252772,0.360704,0.253933,0.360360,0.255656,0.898327,0.368580,0.066987,0.750000,0.75,0.933013,0.5,1.0,0.051479,0.720968,45.167707
9512,0.252747,0.357349,0.248337,0.351906,0.251685,0.351351,0.255656,0.879022,0.362538,0.146447,0.853553,0.75,0.933013,0.5,1.0,0.130026,0.836329,32.476198
9513,0.246154,0.345821,0.243902,0.340176,0.249438,0.345345,0.253394,0.864865,0.353474,0.250000,0.933013,0.75,0.933013,0.5,1.0,0.250030,0.933029,28.561791


## Prepare the dataset

### Folds

In [None]:
# --------------------------------------------------- #
# Let's consider FOLDS with a length of 4 months      #
# (3 years will be used for train, 1 for test!)       #
# --------------------------------------------------- #

FOLD_LENGTH = 24 * 7 * 16                             # 24 times a day * for 7 days * for 16 weeks


# --------------------------------------------------- #
# Let's consider FOLDS starting every 2 weeks         #
# --------------------------------------------------- #

FOLD_STRIDE = 24 * 7 * 2                              # 24 times a day for 2 weeks


# --------------------------------------------------- #
# Let's consider a train-test-split ratio of 2/3      #
# --------------------------------------------------- #

TRAIN_VAL_RATIO = 0.75                               # 3 month train, 1 month test

In [None]:
def get_folds(
    df: pd.DataFrame,
    fold_length: int,
    fold_stride: int) -> List[pd.DataFrame]:
    '''
    This function slides through the Time Series dataframe of shape (n_timesteps, n_features) to create folds
    - of equal `fold_length`
    - using `fold_stride` between each fold

    Returns a list of folds, each as a DataFrame
    '''
    # YOUR CODE HERE
    folds = []
    for idx in range(0, len(df)-fold_length, fold_stride): # --> also possible to get rid of the break
    #for idx in range(0, len(df), fold_stride):   # range(start, stop, step): for each idx in our rows at every 91 days
        # Exits the loop as soon as the last fold index would exceed the last index
        #if (idx + fold_length) > len(df):
            #break
        fold = df.iloc[idx:idx + fold_length, :]  # select from row idx til last row of the fold (3 years), all the columns
        folds.append(fold)   # append the 3 year fold to folds
    return folds

### Train-Val split

In [None]:
def train_val_split(fold:pd.DataFrame,
                     train_val_ratio: float,
                     input_length: int) -> Tuple[pd.DataFrame]:
    '''
    Returns a train dataframe and a test dataframe (fold_train, fold_test)
    from which one can sample (X,y) sequences.
    df_train should contain all the timesteps until round(train_test_ratio * len(fold))
    '''
    # YOUR CODE HERE
    # TRAIN SET
    # ======================
    last_train_idx = round(train_val_ratio * len(fold))  # 0.66 * number of rows in the fold (66% of the fold for train)
    fold_train = fold.iloc[0:last_train_idx, :]   # 1st until last row of train set, all columns

    # TEST SET
    # ======================
    first_val_idx = last_train_idx - input_length  # last row of train set - 2 weeks --> test set starts 2 weeks
                                                                    # before train set ends --> overlap (not a problem with X)
    fold_val = fold.iloc[first_val_idx:, :]   # 1st until last row of val set, all columns

    return (fold_train, fold_val)

In [None]:
(fold_train, fold_val) = train_val_split(folds[0], TRAIN_VAL_RATIO, INPUT_LENGTH)

In [None]:
#Five days as input & output length
INPUT_LENGTH = 24 * 5 # records every hour x 24 hours
                      # for 5 days
OUTPUT_LENGTH = 12

## Model

### Prepare dataset for model training

#### Train-Val split

In [75]:
def train_val_split(df:pd.DataFrame,
                     train_val_ratio: float,
                     input_length: int) -> Tuple[pd.DataFrame]:
    '''
    Returns a train dataframe and a test dataframe (fold_train, fold_test)
    from which one can sample (X,y) sequences.
    df_train should contain all the timesteps until round(train_test_ratio * len(fold))
    '''
    # YOUR CODE HERE
    # TRAIN SET
    # ======================
    last_train_idx = round(train_val_ratio * len(df))  # 0.66 * number of rows in the fold (66% of the fold for train)
    fold_train = df.iloc[0:last_train_idx, :]   # 1st until last row of train set, all columns

    # TEST SET
    # ======================
    first_val_idx = last_train_idx - input_length  # last row of train set - 2 weeks --> test set starts 2 weeks
                                                                    # before train set ends --> overlap (not a problem with X)
    fold_val = df.iloc[first_val_idx:, :]   # 1st until last row of val set, all columns

    return (fold_train, fold_val)

#### Sequences

In [76]:
def get_Xi_yi(
    df:pd.DataFrame,
    input_length:int,  # 120
    output_length:int):  # 120
    '''
    - given a fold, it returns one sequence (X_i, y_i)
    - with the starting point of the sequence being chosen at random
    '''
    # YOUR CODE
    first_possible_start = 0                                    # the +1 accounts for the index, that is exclusive.
    last_possible_start = len(df) - (input_length + output_length) + 1    # It can start as long as there are still
                                                                             # 120 + 1 days after the 1st day.
    random_start = np.random.randint(first_possible_start, last_possible_start)  # np.random to pick a day inside
                                                                                    # the possible interval.
    X_i = df.iloc[random_start:random_start+input_length]

    y_i = df.iloc[random_start+input_length:
                  random_start+input_length+output_length][TARGET]  # creates a pd.DataFrame for the target y

    return (X_i, y_i)

In [77]:
def get_X_y(
    df:pd.DataFrame,
    number_of_sequences:int,
    input_length:int,
    output_length:int
):
    # YOUR CODE HERE
    X, y = [], []  # lists for the sequences for X and y

    for i in range(number_of_sequences):
        (Xi, yi) = get_Xi_yi(df, input_length, output_length)   # calls the previous function to generate sequences X + y
        X.append(Xi)
        y.append(yi)

    return np.array(X), np.array(y)

In [78]:
#Five days as input & output length
INPUT_LENGTH = 24 * 5 * 2 # records every hour x 24 hours
                      # for 5 days
OUTPUT_LENGTH = 24

NUMBER_OF_SEQUENCES_TRAIN = int(len(df) * 0.9)
NUMBER_OF_SEQUENCES_VAL = int(len(df) * 0.1)

In [79]:
TRAIN_VAL_RATIO = 0.9
df_train, df_val = train_val_split(df, TRAIN_VAL_RATIO, INPUT_LENGTH)



In [80]:
X_train, y_train = get_X_y(df_train, NUMBER_OF_SEQUENCES_TRAIN, INPUT_LENGTH, OUTPUT_LENGTH)
X_val, y_val = get_X_y(df_val, NUMBER_OF_SEQUENCES_VAL, INPUT_LENGTH, OUTPUT_LENGTH)


#Dropping the targets from the X (we dont want to train the model on the targets)
X_train = X_train[:, :, :-1]
X_val = X_val[:, :, :-1]

##### Show data

In [81]:
#Training set (splits the whole dataset into training and val set)
df_train.shape

(8564, 54)

In [82]:
#Validation set
df_val.shape

(1191, 54)

In [83]:
#Sequences of features of df_train (Splits the training set into smaller sequences with each shape of (120, 53))
# Train model on these features -> 53 features with 120 past hours
X_train.shape

(8563, 240, 53)

In [84]:
y_train = np.expand_dims(y_train, axis=-1)

In [85]:
#Sequences of corresponding targets of df_train (Splits the training set into smaller sequences with each shape of (120, 3))
# Train model on these targets -> 3 targets with 12 next hours
y_train.shape

(8563, 24, 1)

In [86]:
#Sequences of features of df_val (Splits the val set into 750 sequences with each shape of (120, 53))
X_val.shape

(951, 240, 53)

In [87]:
y_val = np.expand_dims(y_val, axis=-1)

In [88]:
#Sequences of corresponding targets of df_val (Splits the val set into 750 sequences with each shape of (120, 3))
y_val.shape

(951, 24, 1)

In [89]:
X_train.shape[1:]

(240, 53)

##### How do the dfs look like?

In [None]:
df_train

In [None]:
df_val

In [None]:
X_train

In [None]:
y_train

In [None]:
X_val

In [None]:
y_val

### Train model on mini sequences

In [None]:
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers, metrics
from tensorflow.keras.regularizers import L1L2

def init_model(X_train, y_train):

    # 1 - RNN architecture
    # ======================
    model = models.Sequential()

    ## 1.1 - Recurrent Layer
    model.add(layers.LSTM(64,
                          activation='tanh',
                          return_sequences = True,
                          input_shape=(120, 53)

                         ))

    model.add(layers.LSTM(units=32, activation='tanh'))

    ## 1.2 Hidden layer
    model.add(layers.Dense(10, activation="linear"))

    ## 1.2 - Predictive Dense Layers
    output_length = y_train.shape[1]
    model.add(layers.Dense(output_length*3, activation='linear'))  # Flatten the output
    model.add(layers.Reshape((output_length, 3)))  # Reshape to (12, 3)


    # 2 - Compiler
    # ======================
    adam = optimizers.Adam(learning_rate=0.005)
    model.compile(loss='mse', optimizer=adam, metrics=["mae"])

    return model

In [None]:
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers, metrics
from tensorflow.keras.regularizers import L1L2

def init_model(X_train, y_train):

    # 1 - RNN architecture
    # ======================
    model = models.Sequential()

    ## 1.1 - Recurrent Layers
    model.add(layers.LSTM(64,
                          activation='tanh',
                          return_sequences=True,  # Keep output for each time step
                          input_shape=(120, 53)))

    model.add(layers.LSTM(32,
                          activation='tanh',
                          return_sequences=True))  # Keep the sequence dimension

    ## 1.2 - Hidden Dense Layer
    model.add(layers.TimeDistributed(layers.Dense(64, activation="relu")))  # Apply to each time step

    ## 1.3 - Predictive Dense Layer
    output_length = y_train.shape[1]  # 12 future steps
    num_targets = y_train.shape[2]    # 3 targets
    model.add(layers.TimeDistributed(layers.Dense(num_targets, activation='linear')))  # Directly output 3 targets

    # 2 - Compiler
    # ======================
    adam = optimizers.Adam(learning_rate=0.005)
    model.compile(loss='mse', optimizer=adam, metrics=["mae"])

    return model


In [None]:
from tensorflow.keras.metrics import MeanAbsoluteError
from tensorflow.keras import regularizers


def init_model(X_train, y_train):

    # 1 - RNN architecture
    # ======================
    model = models.Sequential()

    ## 1.1 - Recurrent Layers
    model.add(layers.LSTM(32,
                          activation='tanh',
                          return_sequences=True,
                          input_shape=(120, 53),
                          kernel_regularizer=regularizers.l2(0.01)))

    model.add(layers.Dropout(0.3))

    model.add(layers.LSTM(16, activation='tanh', return_sequences=True))

    ## 1.2 - Slice the output to focus only on the last 12 time steps
    model.add(layers.Lambda(lambda x: x[:, -12:, :]))  # Keep only the last 12 time steps

    ## 1.3 - Hidden Dense Layer
    model.add(layers.TimeDistributed(layers.Dense(64, activation="relu")))

    ## 1.4 - Predictive Dense Layer
    num_targets = y_train.shape[2]    # 3 targets
    model.add(layers.TimeDistributed(layers.Dense(num_targets, activation='linear')))

    # 2 - Compiler
    # ======================
    adam = optimizers.Adam(learning_rate=0.005)
    model.compile(loss='mse',
                  optimizer=adam,
                  metrics=[
                      MeanAbsoluteError(name="mae_target_1"),
                      MeanAbsoluteError(name="mae_target_2"),
                      MeanAbsoluteError(name="mae_target_3")
                  ])


    return model


In [None]:
model = init_model(X_train, y_train)
model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
from typing import Tuple

def fit_model(model: tf.keras.Model, verbose=1) -> Tuple[tf.keras.Model, dict]:

    es = EarlyStopping(monitor = "val_loss",
                      patience = 15,
                      mode = "min",
                      restore_best_weights = True)


    history = model.fit(X_train, y_train,
                        validation_data=(X_val, y_val),
                        shuffle = False,
                        batch_size = 32,
                        epochs = 500,
                        callbacks = [es],
                        verbose = verbose)

    return model, history

In [None]:
model = init_model(X_train, y_train)
model.summary()

# 2 - Training
# ====================================
model, history = fit_model(model)

In [None]:
model = init_model(X_train, y_train)
model.summary()

# 2 - Training
# ====================================
model, history = fit_model(model)

#### One model for each feature

##### PV

In [None]:
y_train.shape

In [None]:
y_train[:, :, 0].shape

In [None]:
y_train_pv = y_train[:, :, 0]

In [None]:
y_train_pv = np.expand_dims(y_train_pv, axis=-1)

In [None]:
y_train_pv.shape

In [None]:
y_val_pv = y_val[:,:,0]

In [None]:
y_val_pv = np.expand_dims(y_val_pv, axis=-1)

In [None]:
y_val_pv.shape

In [None]:
from tensorflow.keras.metrics import MeanAbsoluteError
from tensorflow.keras import regularizers


def init_model(X_train, y_train):

    # 1 - RNN architecture
    # ======================
    model = models.Sequential()

    ## 1.1 - Recurrent Layers
    model.add(layers.LSTM(32,
                          activation='tanh',
                          return_sequences=True,
                          input_shape=(120, 53),
                          kernel_regularizer=regularizers.l2(0.01)))

    model.add(layers.Dropout(0.3))

    model.add(layers.LSTM(16, activation='tanh', return_sequences=True))

    ## 1.2 - Slice the output to focus only on the last 12 time steps
    model.add(layers.Lambda(lambda x: x[:, -12:, :]))  # Keep only the last 12 time steps

    ## 1.3 - Hidden Dense Layer
    model.add(layers.TimeDistributed(layers.Dense(64, activation="relu")))

    ## 1.4 - Predictive Dense Layer
    model.add(layers.TimeDistributed(layers.Dense(1, activation='linear')))

    # 2 - Compiler
    # ======================
    adam = optimizers.Adam(learning_rate=0.005)
    model.compile(loss='mse',
                  optimizer=adam,
                  metrics=['mae'
                  ])


    return model

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
from typing import Tuple

def fit_model(model: tf.keras.Model, verbose=1) -> Tuple[tf.keras.Model, dict]:

    es = EarlyStopping(monitor = "val_loss",
                      patience = 15,
                      mode = "min",
                      restore_best_weights = True)


    history = model.fit(X_train, y_train_pv,
                        validation_data=(X_val, y_val_pv),
                        shuffle = False,
                        batch_size = 32,
                        epochs = 500,
                        callbacks = [es],
                        verbose = verbose)

    return model, history

In [None]:
model = init_model(X_train, y_train_pv)
model.summary()

# 2 - Training
# ====================================
model, history = fit_model(model)

## Consumption

In [None]:
y_train.shape

In [90]:
from tensorflow.keras.metrics import MeanAbsoluteError
from tensorflow.keras import regularizers
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers, metrics
from tensorflow.keras.regularizers import L1L2

input_shape = X_train.shape[1:]
def init_model(X_train, y_train):

    # 1 - RNN architecture
    # ======================
    model = models.Sequential()

    ## 1.1 - Recurrent Layers
    model.add(layers.LSTM(32,
                          activation='tanh',
                          return_sequences=True,
                          input_shape=input_shape,
                          kernel_regularizer=regularizers.l2(0.02)))

    model.add(layers.Dropout(0.3))

    model.add(layers.LSTM(16, activation='tanh', return_sequences=True))


    ## 1.2 - Slice the output to focus only on the last 12 time steps
    model.add(layers.Lambda(lambda x: x[:, -24:, :]))  # Keep only the last 12 time steps

    ## 1.3 - Hidden Dense Layer
    model.add(layers.TimeDistributed(layers.Dense(32, activation="relu")))

    ## 1.4 - Predictive Dense Layer
    model.add(layers.TimeDistributed(layers.Dense(1, activation='linear')))

    # 2 - Compiler
    # ======================
    adam = optimizers.Adam(learning_rate=0.005)
    model.compile(loss='mse',
                  optimizer=adam,
                  metrics=['mae'
                  ])


    return model

In [91]:
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
from typing import Tuple

def fit_model(model: tf.keras.Model, verbose=1) -> Tuple[tf.keras.Model, dict]:

    es = EarlyStopping(monitor = "val_loss",
                      patience = 15,
                      mode = "min",
                      restore_best_weights = True)


    history = model.fit(X_train, y_train,
                        validation_data=(X_val, y_val),
                        shuffle = False,
                        batch_size = 32,
                        epochs = 500,
                        callbacks = [es],
                        verbose = verbose)

    return model, history

In [92]:
model = init_model(X_train, y_train)
model.summary()

# 2 - Training
# ====================================
model, history = fit_model(model)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_4 (LSTM)               (None, 240, 32)           11008     
                                                                 
 dropout_2 (Dropout)         (None, 240, 32)           0         
                                                                 
 lstm_5 (LSTM)               (None, 240, 16)           3136      
                                                                 
 lambda_2 (Lambda)           (None, 24, 16)            0         
                                                                 
 time_distributed_4 (TimeDis  (None, 24, 32)           544       
 tributed)                                                       
                                                                 
 time_distributed_5 (TimeDis  (None, 24, 1)            33        
 tributed)                                            

In [33]:
model = init_model(X_train, y_train)
model.summary()

# 2 - Training
# ====================================
model, history = fit_model(model)

2024-12-02 15:58:23.281695: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 240, 32)           11008     
                                                                 
 dropout (Dropout)           (None, 240, 32)           0         
                                                                 
 lstm_1 (LSTM)               (None, 240, 16)           3136      
                                                                 
 lambda (Lambda)             (None, 24, 16)            0         
                                                                 
 time_distributed (TimeDistr  (None, 24, 32)           544       
 ibuted)                                                         
                                                                 
 time_distributed_1 (TimeDis  (None, 24, 1)            33        
 tributed)                                              

In [98]:
x_temp = X_val[0]

In [103]:
x_temp = np.expand_dims(x_temp, axis=0)

In [104]:
x_temp.shape

(1, 240, 53)

In [108]:
pred = model.predict(x_temp)



In [109]:
pred[0]

array([[26.542133],
       [24.893875],
       [24.1705  ],
       [23.999874],
       [23.873137],
       [24.307968],
       [25.709177],
       [27.953548],
       [33.35124 ],
       [36.658524],
       [35.119118],
       [30.864096],
       [26.67113 ],
       [23.850672],
       [22.142677],
       [21.160099],
       [20.61352 ],
       [20.344252],
       [20.35854 ],
       [20.779062],
       [21.855276],
       [23.639765],
       [25.492186],
       [26.796862]], dtype=float32)

In [None]:
y_train.shape

In [None]:
y_test[:12]

In [None]:
test_data

##### Consumption

In [None]:
y_train_cons = y_train[:, :, 1]
y_train_cons = np.expand_dims(y_train_cons, axis=-1)
y_train_cons.shape

In [None]:
y_val_cons = y_val[:,:,1]
y_val_cons = np.expand_dims(y_val_cons, axis=-1)
y_val_cons.shape

In [None]:
def init_model(X_train, y_train):

    # 1 - RNN architecture
    # ======================
    model = models.Sequential()

    ## 1.1 - Recurrent Layers
    model.add(layers.LSTM(32,
                          activation='tanh',
                          return_sequences=True,
                          input_shape=(120, 53),
                          kernel_regularizer=regularizers.l2(0.01)))

    model.add(layers.Dropout(0.3))

    model.add(layers.LSTM(16, activation='tanh', return_sequences=True))

    ## 1.2 - Slice the output to focus only on the last 12 time steps
    model.add(layers.Lambda(lambda x: x[:, -12:, :]))  # Keep only the last 12 time steps

    ## 1.3 - Hidden Dense Layer
    model.add(layers.TimeDistributed(layers.Dense(64, activation="relu")))

    ## 1.4 - Predictive Dense Layer
    model.add(layers.TimeDistributed(layers.Dense(1, activation='linear')))

    # 2 - Compiler
    # ======================
    adam = optimizers.Adam(learning_rate=0.005)
    model.compile(loss='mse',
                  optimizer=adam,
                  metrics=['mae'
                  ])


    return model

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
from typing import Tuple

def fit_model(model: tf.keras.Model, verbose=1) -> Tuple[tf.keras.Model, dict]:

    es = EarlyStopping(monitor = "val_loss",
                      patience = 30,
                      mode = "min",
                      restore_best_weights = True)


    history = model.fit(X_train, y_train_cons,
                        validation_data=(X_val, y_val_cons),
                        shuffle = False,
                        batch_size = 32,
                        epochs = 500,
                        callbacks = [es],
                        verbose = verbose)

    return model, history

In [None]:
model = init_model(X_train, y_train_cons)
model.summary()

# 2 - Training
# ====================================
model, history = fit_model(model)

In [None]:
X_train_transformed.columns

## Cross Validation

### Sequences

In [None]:
def get_Xi_yi(
    df:pd.DataFrame,
    input_length:int,  # 120
    output_length:int):  # 120
    '''
    - given a fold, it returns one sequence (X_i, y_i)
    - with the starting point of the sequence being chosen at random
    '''
    # YOUR CODE
    first_possible_start = 0                                    # the +1 accounts for the index, that is exclusive.
    last_possible_start = len(df) - (input_length + output_length) + 1    # It can start as long as there are still
                                                                             # 120 + 1 days after the 1st day.
    random_start = np.random.randint(first_possible_start, last_possible_start)  # np.random to pick a day inside
                                                                                    # the possible interval.
    X_i = df.iloc[random_start:random_start+input_length]

    y_i = df.iloc[random_start+input_length:
                  random_start+input_length+output_length][TARGET]  # creates a pd.DataFrame for the target y

    return (X_i, y_i)

In [None]:
#Testing if the function works
Xi, Yi = get_Xi_yi(
    fold=folds[0],
    input_length=INPUT_LENGTH,
    output_length=OUTPUT_LENGTH)

In [None]:
Xi.shape

In [None]:
### In the recap they choose a number which is bigger than the number of rows in the fold -> does that mean there are duplications???!!!
NUMBER_OF_SEQUENCES_TRAIN = (FOLD_LENGTH - (INPUT_LENGTH + OUTPUT_LENGTH))
NUMBER_OF_SEQUENCES_VAL = (FOLD_LENGTH - (INPUT_LENGTH + OUTPUT_LENGTH))

In [None]:
NUMBER_OF_SEQUENCES_TRAIN = len(df) - (INPUT_LENGTH + OUTPUT_LENGTH)

In [None]:
NUMBER_OF_SEQUENCES_TRAIN

In [None]:
def get_X_y(
    df:pd.DataFrame,
    number_of_sequences:int,
    input_length:int,
    output_length:int
):
    # YOUR CODE HERE
    X, y = [], []  # lists for the sequences for X and y

    for i in range(number_of_sequences):
        (Xi, yi) = get_Xi_yi(df, input_length, output_length)   # calls the previous function to generate sequences X + y
        X.append(Xi)
        y.append(yi)

    return np.array(X), np.array(y)

In [None]:
seq = get_X_y(df, NUMBER_OF_SEQUENCES_TRAIN, INPUT_LENGTH, OUTPUT_LENGTH)

In [None]:
seq[1].shape

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
y_temp = y_train[:,:,0]

In [None]:
y_temp = np.expand_dims(y_temp, axis=-1)

In [None]:
y_temp.shape

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

def cross_validate_baseline_and_lstm():
    '''
    This function cross-validates
    - the "last seen value" baseline model
    - the RNN model
    '''

    list_of_mae_baseline_model = []
    list_of_mae_recurrent_model = []

    # 0 - Creating folds
    # =========================================
    folds = get_folds(df, FOLD_LENGTH, FOLD_STRIDE)  # function we coded to get the folds

    for fold_id, fold in enumerate(folds):

        # 1 - Train/val split the current fold
        # =========================================
        (fold_train, fold_val) = train_val_split(fold, TRAIN_VAL_RATIO, INPUT_LENGTH) # function we coded to split train/val

        X_train, y_train = get_X_y(fold_train, NUMBER_OF_SEQUENCES_TRAIN, INPUT_LENGTH, OUTPUT_LENGTH)  # function we coded to get multiple
        X_val, y_val = get_X_y(fold_val, NUMBER_OF_SEQUENCES_TRAIN, INPUT_LENGTH, OUTPUT_LENGTH)       # sequences from a fold

        # 2 - Modelling
        # =========================================

        ##### Baseline Model
        baseline_model = init_baseline()
        mae_baseline = baseline_model.evaluate(X_val, y_val, verbose=0)[1]   # evaluating baseline model (metric)
        list_of_mae_baseline_model.append(mae_baseline)
        print("-"*50)
        print(f"MAE baseline fold n°{fold_id} = {round(mae_baseline, 2)}")

        ##### LSTM Model
        model = init_model(X_train, y_train)
        es = EarlyStopping(monitor = "val_mae",
                           mode = "min",
                           patience = 3,
                           restore_best_weights = True)

        history = model.fit(X_train, y_train,
                            validation_split = 0.3,
                            shuffle = False,
                            batch_size = 32,
                            epochs = 50,
                            callbacks = [es],
                            verbose = 0)
        res = model.evaluate(X_val, y_val, verbose=0)    # evaluating LSTM (metric)
        mae_lstm = res[1]
        list_of_mae_recurrent_model.append(mae_lstm)
        print(f"MAE LSTM fold n°{fold_id} = {round(mae_lstm, 2)}")

        ##### Comparison LSTM vs Baseline for the current fold
        print(f"🏋🏽‍♂️ improvement over baseline: {round((1 - (mae_lstm/mae_baseline))*100,2)} % \n")

    return list_of_mae_baseline_model, list_of_mae_recurrent_model

In [None]:
###NOT IMPORTANT NOW

fold_length = 24 * 7 * 16    # 24 hours * 7 days * 16 weeks (4 months)
fold_stride = 24 * 7 * 2     # 24 hours * 7 days * 2 weeks
train_test_ratio = 0.66

In [None]:
def get_folds(
    df: pd.DataFrame,
    fold_length: int,
    fold_stride: int) -> List[pd.DataFrame]:
    '''
    This function slides through the Time Series dataframe of shape (n_timesteps, n_features) to create folds
    - of equal `fold_length`
    - using `fold_stride` between each fold

    Returns a list of folds, each as a DataFrame
    '''
    # YOUR CODE HERE
    folds = []
    for idx in range(0, len(df)-fold_length, fold_stride): # --> also possible to get rid of the break
    #for idx in range(0, len(df), fold_stride):   # range(start, stop, step): for each idx in our rows at every 91 days
        # Exits the loop as soon as the last fold index would exceed the last index
        #if (idx + fold_length) > len(df):
            #break
        fold = df.iloc[idx:idx + fold_length, :]  # select from row idx til last row of the fold (3 years), all the columns
        folds.append(fold)   # append the 3 year fold to folds
    return folds