In [18]:
import pandas as pd
from data import citylearn_challenge_2022_phase_1 as competition_data
import os.path as osp
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.model_selection import grid_search_forecaster
from matplotlib import pyplot as plt
from skforecast.model_selection import backtesting_forecaster

consumptions_path = osp.join(osp.dirname(competition_data.__file__), "consumptions/building_consumptions.csv")
consumptions = pd.read_csv(consumptions_path)[[f"{i}" for i in range(5)]]
# consumptions

In [19]:
consumption_building_1 = consumptions["0"].to_frame().rename(columns={"0":"Consumption"})
consumption_building_2 = consumptions["1"].to_frame().rename(columns={"1":"Consumption"})
consumption_building_3 = consumptions["2"].to_frame().rename(columns={"2":"Consumption"})
consumption_building_4 = consumptions["3"].to_frame().rename(columns={"3":"Consumption"})
consumption_building_5 = consumptions["4"].to_frame().rename(columns={"4":"Consumption"})

In [20]:
data = consumption_building_1

In [21]:
# Lags used as predictors
load_lags_grid = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60],
                  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
                  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57],
                  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37],
                  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
                  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41],
                  [1],
                  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
                  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
                  [1, 23, 45, 67],
                  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
                  [1, 23, 45],
                  [1, 2, 3, 4, 5],
                  [1, 2, 3, 4, 5, 6],
                  [1, 2, 3, 4, 23, 24, 25, 26, 45, 46, 47, 48, 67, 68, 69, 70]]


In [22]:
solar_lags_grid = [
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60],
[1, 2, 23, 24, 45, 46],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
[1],
[1, 2, 3, 4, 5, 6, 23, 24, 25, 26, 27, 28],
[1, 2],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63],
[1, 2, 23, 24, 45, 46, 67, 68],
[1, 2, 23, 24, 45, 46],
]

In [32]:
def run_grid_search(ys, lags_grid):
    all_results = pd.DataFrame()
    for y in ys:

        forecaster = ForecasterAutoreg(
                     regressor     = Ridge(random_state=123),
                     lags          = 24, # This value will be replaced in the grid search
                     transformer_y = StandardScaler()
                 )

        forecaster.fit(y=y)

        # Regressor's hyperparameters
        param_grid = {'alpha': np.logspace(-3, 5, 10),
                      'tol': np.logspace(-1, -10, 10),
                      'solver': ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']}


        results_grid = grid_search_forecaster(
                           forecaster         = forecaster,
                           y                  = y,
                           param_grid         = param_grid, # param_grid,
                           lags_grid          = lags_grid,
                           steps              = 1,
                           metric             = 'mean_squared_error',
                           refit              = False,
                           initial_train_size = len(y) - 1,
                           fixed_train_size   = False,
                           return_best        = True,
                           verbose            = False
                       )

        all_results = all_results.append(results_grid[:2])

    return all_results



In [28]:
# results = run_grid_search([consumption_building_1.Consumption, consumption_building_2.Consumption, consumption_building_3.Consumption, consumption_building_4.Consumption, consumption_building_5.Consumption])

In [29]:
nsload_data_path = osp.join(osp.dirname(competition_data.__file__), "load_data.csv")
df_loads = pd.read_csv(nsload_data_path)
df_load_1 = df_loads[:8760]["non_shiftable_load"].to_frame()
df_load_2 = df_loads[8760:8760*2]["non_shiftable_load"].to_frame()
df_load_3 = df_loads[8760*2:8760*3]["non_shiftable_load"].to_frame()
df_load_4 = df_loads[8760*3:8760*4]["non_shiftable_load"].to_frame()
df_load_5 = df_loads[8760*4:8760*5]["non_shiftable_load"].to_frame()

In [30]:
run_grid_search([df_load_1.non_shiftable_load, df_load_2.non_shiftable_load, df_load_3.non_shiftable_load, df_load_4.non_shiftable_load, df_load_5.non_shiftable_load], load_lags_grid)

Number of models compared: 10500.


loop lags_grid:   0%|                                              | 0/15 [00:00<?, ?it/s]
loop param_grid:   0%|                                            | 0/700 [00:00<?, ?it/s][A
loop param_grid:   0%|                                    | 2/700 [00:00<00:43, 16.06it/s][A
loop param_grid:   1%|▏                                   | 4/700 [00:00<00:43, 15.91it/s][A
loop param_grid:   1%|▎                                   | 6/700 [00:00<00:43, 15.88it/s][A
loop param_grid:   1%|▍                                   | 8/700 [00:00<00:43, 15.95it/s][A
loop param_grid:   1%|▌                                  | 10/700 [00:00<00:43, 15.95it/s][A
loop param_grid:   2%|▌                                  | 12/700 [00:00<00:47, 14.36it/s][A
loop param_grid:   2%|▋                                  | 14/700 [00:00<00:50, 13.53it/s][A
loop param_grid:   2%|▊                                  | 16/700 [00:01<00:51, 13.17it/s][A
loop param_grid:   3%|▉                                  | 18/7

`Forecaster` refitted using the best-found lags and parameters, and the whole data set: 
  Lags: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 23 24 25 26 27 28 29 30
 31 32 33 34 35 36 37 38 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60] 
  Parameters: {'alpha': 3.593813663804626, 'solver': 'sag', 'tol': 0.001}
  Backtesting metric: 1.6570807747339501e-06

Number of models compared: 10500.


loop lags_grid:   0%|                                              | 0/15 [00:00<?, ?it/s]
loop param_grid:   0%|                                            | 0/700 [00:00<?, ?it/s][A
loop param_grid:   0%|                                    | 2/700 [00:00<00:43, 16.13it/s][A
loop param_grid:   1%|▏                                   | 4/700 [00:00<00:44, 15.68it/s][A
loop param_grid:   1%|▎                                   | 6/700 [00:00<00:44, 15.74it/s][A
loop param_grid:   1%|▍                                   | 8/700 [00:00<00:45, 15.18it/s][A
loop param_grid:   1%|▌                                  | 10/700 [00:00<00:45, 15.04it/s][A
loop param_grid:   2%|▌                                  | 12/700 [00:00<00:50, 13.51it/s][A
loop param_grid:   2%|▋                                  | 14/700 [00:00<00:52, 12.95it/s][A
loop param_grid:   2%|▊                                  | 16/700 [00:01<00:54, 12.64it/s][A
loop param_grid:   3%|▉                                  | 18/7

`Forecaster` refitted using the best-found lags and parameters, and the whole data set: 
  Lags: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 23 24 25 26 27 28 29 30 31
 32 33 34 35 36 37 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59] 
  Parameters: {'alpha': 0.001, 'solver': 'sag', 'tol': 0.1}
  Backtesting metric: 0.003901924484038252

Number of models compared: 10500.


loop lags_grid:   0%|                                              | 0/15 [00:00<?, ?it/s]
loop param_grid:   0%|                                            | 0/700 [00:00<?, ?it/s][A
loop param_grid:   0%|                                    | 2/700 [00:00<00:38, 18.29it/s][A
loop param_grid:   1%|▏                                   | 4/700 [00:00<00:41, 16.87it/s][A
loop param_grid:   1%|▎                                   | 6/700 [00:00<00:42, 16.46it/s][A
loop param_grid:   1%|▍                                   | 8/700 [00:00<00:40, 17.14it/s][A
loop param_grid:   1%|▌                                  | 10/700 [00:00<00:41, 16.71it/s][A
loop param_grid:   2%|▌                                  | 12/700 [00:00<00:45, 15.14it/s][A
loop param_grid:   2%|▋                                  | 14/700 [00:00<00:46, 14.83it/s][A
loop param_grid:   2%|▊                                  | 16/700 [00:01<00:48, 14.11it/s][A
loop param_grid:   3%|▉                                  | 18/7

`Forecaster` refitted using the best-found lags and parameters, and the whole data set: 
  Lags: [ 1 23 45] 
  Parameters: {'alpha': 12915.496650148827, 'solver': 'sparse_cg', 'tol': 0.1}
  Backtesting metric: 5.158026788299217e-07

Number of models compared: 10500.


loop lags_grid:   0%|                                              | 0/15 [00:00<?, ?it/s]
loop param_grid:   0%|                                            | 0/700 [00:00<?, ?it/s][A
loop param_grid:   0%|                                    | 2/700 [00:00<00:43, 16.17it/s][A
loop param_grid:   1%|▏                                   | 4/700 [00:00<00:42, 16.25it/s][A
loop param_grid:   1%|▎                                   | 6/700 [00:00<00:42, 16.25it/s][A
loop param_grid:   1%|▍                                   | 8/700 [00:00<00:42, 16.24it/s][A
loop param_grid:   1%|▌                                  | 10/700 [00:00<00:42, 16.17it/s][A
loop param_grid:   2%|▌                                  | 12/700 [00:00<00:46, 14.65it/s][A
loop param_grid:   2%|▋                                  | 14/700 [00:00<00:49, 13.88it/s][A
loop param_grid:   2%|▊                                  | 16/700 [00:01<00:50, 13.43it/s][A
loop param_grid:   3%|▉                                  | 18/7

`Forecaster` refitted using the best-found lags and parameters, and the whole data set: 
  Lags: [ 1 23 45 67] 
  Parameters: {'alpha': 12915.496650148827, 'solver': 'sparse_cg', 'tol': 0.01}
  Backtesting metric: 8.272808043205157e-07

Number of models compared: 10500.


loop lags_grid:   0%|                                              | 0/15 [00:00<?, ?it/s]
loop param_grid:   0%|                                            | 0/700 [00:00<?, ?it/s][A
loop param_grid:   0%|                                    | 2/700 [00:00<00:44, 15.84it/s][A
loop param_grid:   1%|▏                                   | 4/700 [00:00<00:44, 15.61it/s][A
loop param_grid:   1%|▎                                   | 6/700 [00:00<00:44, 15.61it/s][A
loop param_grid:   1%|▍                                   | 8/700 [00:00<00:44, 15.50it/s][A
loop param_grid:   1%|▌                                  | 10/700 [00:00<00:45, 15.27it/s][A
loop param_grid:   2%|▌                                  | 12/700 [00:00<00:48, 14.09it/s][A
loop param_grid:   2%|▋                                  | 14/700 [00:00<00:50, 13.47it/s][A
loop param_grid:   2%|▊                                  | 16/700 [00:01<00:52, 13.00it/s][A
loop param_grid:   3%|▉                                  | 18/7

`Forecaster` refitted using the best-found lags and parameters, and the whole data set: 
  Lags: [1 2 3 4 5] 
  Parameters: {'alpha': 27.825594022071257, 'solver': 'sag', 'tol': 0.1}
  Backtesting metric: 0.058074468732355715



Unnamed: 0,lags,params,mean_squared_error,alpha,solver,tol
332,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","{'alpha': 3.593813663804626, 'solver': 'sag', ...",1.657081e-06,3.593814,sag,0.001
402,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","{'alpha': 27.825594022071257, 'solver': 'sag',...",3.942054e-06,27.825594,sag,0.001
391,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","{'alpha': 27.825594022071257, 'solver': 'spars...",4.207972e-06,27.825594,sparse_cg,0.01
2850,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","{'alpha': 0.001, 'solver': 'sag', 'tol': 0.1}",0.003901924,0.001,sag,0.1
750,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","{'alpha': 0.001, 'solver': 'sag', 'tol': 0.1}",0.003901924,0.001,sag,0.1
2920,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","{'alpha': 0.007742636826811269, 'solver': 'sag...",0.00390237,0.007743,sag,0.1
8300,"[1, 23, 45]","{'alpha': 12915.496650148827, 'solver': 'spars...",5.158027e-07,12915.49665,sparse_cg,0.1
8291,"[1, 23, 45]","{'alpha': 12915.496650148827, 'solver': 'lsqr'...",5.158027e-07,12915.49665,lsqr,0.01
8301,"[1, 23, 45]","{'alpha': 12915.496650148827, 'solver': 'spars...",5.158027e-07,12915.49665,sparse_cg,0.01
6901,"[1, 23, 45, 67]","{'alpha': 12915.496650148827, 'solver': 'spars...",8.272808e-07,12915.49665,sparse_cg,0.01


In [33]:
nsload_data_path = osp.join(osp.dirname(competition_data.__file__), "solar_data.csv")
df_solars = pd.read_csv(nsload_data_path)
df_solar_1 = df_solars[:8760]["solar_generation"].to_frame()
df_solar_2 = df_solars[8760:8760*2]["solar_generation"].to_frame()
df_solar_3 = df_solars[8760*2:8760*3]["solar_generation"].to_frame()
df_solar_4 = df_solars[8760*3:8760*4]["solar_generation"].to_frame()
df_solar_5 = df_solars[8760*4:8760*5]["solar_generation"].to_frame()

In [None]:
run_grid_search([df_solar_1.solar_generation, df_solar_2.solar_generation, df_solar_3.solar_generation, df_solar_4.solar_generation, df_solar_5.solar_generation], solar_lags_grid)

Number of models compared: 10500.


loop lags_grid:   0%|                                              | 0/15 [00:00<?, ?it/s]
loop param_grid:   0%|                                            | 0/700 [00:00<?, ?it/s][A
loop param_grid:   0%|                                    | 2/700 [00:00<00:44, 15.80it/s][A
loop param_grid:   1%|▏                                   | 4/700 [00:00<00:44, 15.67it/s][A
loop param_grid:   1%|▎                                   | 6/700 [00:00<00:44, 15.52it/s][A
loop param_grid:   1%|▍                                   | 8/700 [00:00<00:44, 15.59it/s][A
loop param_grid:   1%|▌                                  | 10/700 [00:00<00:44, 15.64it/s][A
loop param_grid:   2%|▌                                  | 12/700 [00:00<00:50, 13.65it/s][A
loop param_grid:   2%|▋                                  | 14/700 [00:01<00:53, 12.73it/s][A
loop param_grid:   2%|▊                                  | 16/700 [00:01<00:56, 12.11it/s][A
loop param_grid:   3%|▉                                  | 18/7

`Forecaster` refitted using the best-found lags and parameters, and the whole data set: 
  Lags: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 23 24 25 26 27
 28 29 30 31 32 33 34 35 36 37 38 39 40 41 45 46 47 48 49 50 51 52 53 54
 55 56 57 58 59 60 61 62 63] 
  Parameters: {'alpha': 0.001, 'solver': 'sparse_cg', 'tol': 1e-06}
  Backtesting metric: 8.537524238849973e-17

Number of models compared: 10500.


loop lags_grid:   0%|                                              | 0/15 [00:00<?, ?it/s]
loop param_grid:   0%|                                            | 0/700 [00:00<?, ?it/s][A
loop param_grid:   0%|                                    | 2/700 [00:00<00:44, 15.55it/s][A
loop param_grid:   1%|▏                                   | 4/700 [00:00<00:45, 15.45it/s][A
loop param_grid:   1%|▎                                   | 6/700 [00:00<00:44, 15.59it/s][A
loop param_grid:   1%|▍                                   | 8/700 [00:00<00:43, 15.75it/s][A
loop param_grid:   1%|▌                                  | 10/700 [00:00<00:43, 15.81it/s][A
loop param_grid:   2%|▌                                  | 12/700 [00:00<00:49, 14.03it/s][A
loop param_grid:   2%|▋                                  | 14/700 [00:00<00:52, 13.18it/s][A
loop param_grid:   2%|▊                                  | 16/700 [00:01<00:54, 12.55it/s][A
loop param_grid:   3%|▉                                  | 18/7

`Forecaster` refitted using the best-found lags and parameters, and the whole data set: 
  Lags: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 23 24 25 26 27 28 29 30
 31 32 33 34 35 36 37 38 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60] 
  Parameters: {'alpha': 3.593813663804626, 'solver': 'sparse_cg', 'tol': 0.0001}
  Backtesting metric: 3.2171615244612926e-12

Number of models compared: 10500.


loop lags_grid:   0%|                                              | 0/15 [00:00<?, ?it/s]
loop param_grid:   0%|                                            | 0/700 [00:00<?, ?it/s][A
loop param_grid:   0%|                                    | 2/700 [00:00<00:42, 16.33it/s][A
loop param_grid:   1%|▏                                   | 4/700 [00:00<00:41, 16.96it/s][A
loop param_grid:   1%|▎                                   | 6/700 [00:00<00:42, 16.51it/s][A
loop param_grid:   1%|▍                                   | 8/700 [00:00<00:42, 16.30it/s][A
loop param_grid:   1%|▌                                  | 10/700 [00:00<00:40, 16.97it/s][A
loop param_grid:   2%|▌                                  | 12/700 [00:00<00:46, 14.68it/s][A
loop param_grid:   2%|▋                                  | 14/700 [00:00<00:50, 13.52it/s][A
loop param_grid:   2%|▊                                  | 16/700 [00:01<00:52, 13.05it/s][A
loop param_grid:   3%|▉                                  | 18/7

`Forecaster` refitted using the best-found lags and parameters, and the whole data set: 
  Lags: [ 1  2  3  4  5  6  7  8  9 10] 
  Parameters: {'alpha': 0.46415888336127775, 'solver': 'saga', 'tol': 0.01}
  Backtesting metric: 2.5895040234623336e-08

Number of models compared: 10500.


loop lags_grid:   0%|                                              | 0/15 [00:00<?, ?it/s]
loop param_grid:   0%|                                            | 0/700 [00:00<?, ?it/s][A
loop param_grid:   0%|                                    | 2/700 [00:00<00:43, 16.00it/s][A
loop param_grid:   1%|▏                                   | 4/700 [00:00<00:43, 16.13it/s][A
loop param_grid:   1%|▎                                   | 6/700 [00:00<00:43, 16.07it/s][A
loop param_grid:   1%|▍                                   | 8/700 [00:00<00:45, 15.28it/s][A
loop param_grid:   1%|▌                                  | 10/700 [00:00<00:44, 15.53it/s][A
loop param_grid:   2%|▌                                  | 12/700 [00:00<00:49, 14.03it/s][A
loop param_grid:   2%|▋                                  | 14/700 [00:00<00:50, 13.60it/s][A
loop param_grid:   2%|▊                                  | 16/700 [00:01<00:51, 13.34it/s][A
loop param_grid:   3%|▉                                  | 18/7