# Libraries and Data

In [1]:
#libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import ParameterGrid
from darts.timeseries import TimeSeries
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.dataprocessing.transformers import Scaler
from darts.models import RNNModel
from sklearn.metrics import mean_squared_error

  from tqdm.autonotebook import tqdm


In [2]:
#load the data
#YYYY-MM-DD
df = pd.read_csv('../nyc_data.csv', index_col = 0, parse_dates = True)
df.head()

Unnamed: 0_level_0,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-01,720.000885,0,0,0,3.68,41.305
2015-01-02,581.276773,0,0,0,4.73,131.574
2015-01-03,754.117039,0,0,0,7.23,162.7
2015-01-04,622.252774,0,0,0,10.96,160.281
2015-01-05,785.373319,0,0,0,6.92,51.077


In [3]:
#Rename variable
df = df.rename(columns = {'Demand': 'y'})
df.head(0)

Unnamed: 0_level_0,y,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


# Prepare for LSTM

In [4]:
#Time Series object
series = TimeSeries.from_series(df.y)
covariates = TimeSeries.from_dataframe(df.iloc[:,1:])

In [5]:
#year
year_series = datetime_attribute_timeseries(
    pd.date_range(start=series.start_time(),
                  freq = series.freq_str,
                  periods= df.shape[0]),
                  attribute ="year",
                  one_hot = False)

#month
month_series = datetime_attribute_timeseries(year_series,
                                            attribute ="month",
                                            one_hot = True)

#weekday
weekday_series = datetime_attribute_timeseries(year_series,
                                               attribute ="weekday",
                                               one_hot = True)

In [6]:
# preparing scalers
transformer1 = Scaler()
transformer2 = Scaler()

In [7]:
#Scale the Y
y_transformed = transformer1.fit_transform(series)

In [8]:
# Scale the covariates
covariates = covariates.stack(year_series)
covariates_transformed = transformer2.fit_transform(covariates)
covariates_transformed = covariates_transformed.stack(month_series)
covariates_transformed = covariates_transformed.stack(weekday_series)

# LSTM

In [11]:
#model
#https://unit8co.github.io/darts/generated_api/darts.models.forecasting.rnn_model.html
model = RNNModel(model = "LSTM",
                 hidden_dim = 20,
                 n_rnn_layers= 2,
                 dropout = 0,
                 n_epochs = 20,
                 optimizer_kwargs = {"lr": 0.003},
                 random_state = 1502,
                 training_length = 20,
                 input_chunk_length= 15,
                 pl_trainer_kwargs = {"accelerator": "cpu"},
                 save_checkpoints = True)

In [12]:
#fit the model to the data
model.fit(y_transformed,
          future_covariates = covariates_transformed)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 7.2 K 
4 | V             | Linear           | 21    
---------------------------------------------------
7.2 K     Trainable params
0         Non-trainable params
7.2 K     Total params
0.029     Total estimated model params size (MB)


Epoch 1:  10%|█         | 7/68 [00:00<00:01, 40.50it/s, train_loss=0.0087]  



Epoch 19: 100%|██████████| 68/68 [00:01<00:00, 40.50it/s, train_loss=0.00256]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 68/68 [00:01<00:00, 40.35it/s, train_loss=0.00256]


RNNModel(model=LSTM, hidden_dim=20, n_rnn_layers=2, dropout=0, training_length=20, n_epochs=20, optimizer_kwargs={'lr': 0.003}, random_state=1502, input_chunk_length=15, pl_trainer_kwargs={'accelerator': 'cpu'}, save_checkpoints=True)

# Cross Validation

In [14]:
# CV
cv = model.historical_forecasts(y_transformed,
                                future_covariates = covariates_transformed,
                                start = df.shape[0] - 180,
                                forecast_horizon = 31,
                                stride = 16,
                                retrain = True,
                                last_points_only = False)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 7.2 K 
4 | V             | Linear           | 21    
---------------------------------------------------
7.2 K     Trainable params
0         Non-trainable params
7.2 K     Total params
0.029     Total estimated model params size (MB)


Epoch 0:   0%|          | 0/63 [00:00<?, ?it/s] 

Epoch 19: 100%|██████████| 63/63 [00:01<00:00, 42.85it/s, train_loss=0.00247]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 63/63 [00:01<00:00, 42.65it/s, train_loss=0.00247]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 7.2 K 
4 | V             | Linear           | 21    
---------------------------------------------------
7.2 K     Trainable params
0         Non-trainable params
7.2 K     Total params
0.029     Total estimated model params size (MB)


Epoch 19: 100%|██████████| 63/63 [00:01<00:00, 43.92it/s, train_loss=0.00274]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 63/63 [00:01<00:00, 43.72it/s, train_loss=0.00274]

GPU available: False, used: False





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 7.2 K 
4 | V             | Linear           | 21    
---------------------------------------------------
7.2 K     Trainable params
0         Non-trainable params
7.2 K     Total params
0.029     Total estimated model params size (MB)


Epoch 19: 100%|██████████| 64/64 [00:01<00:00, 43.97it/s, train_loss=0.00194]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 64/64 [00:01<00:00, 43.80it/s, train_loss=0.00194]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 7.2 K 
4 | V             | Linear           | 21    
---------------------------------------------------
7.2 K     Trainable params
0         Non-trainable params
7.2 K     Total params
0.029     Total estimated model params size (MB)


Epoch 19: 100%|██████████| 64/64 [00:01<00:00, 35.44it/s, train_loss=0.00244]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 64/64 [00:01<00:00, 35.31it/s, train_loss=0.00244]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 7.2 K 
4 | V             | Linear           | 21    
---------------------------------------------------
7.2 K     Trainable params
0         Non-trainable params
7.2 K     Total params
0.029     Total estimated model params size (MB)


Epoch 19: 100%|██████████| 65/65 [00:01<00:00, 36.36it/s, train_loss=0.00246]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 65/65 [00:01<00:00, 36.19it/s, train_loss=0.00246]

GPU available: False, used: False





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 7.2 K 
4 | V             | Linear           | 21    
---------------------------------------------------
7.2 K     Trainable params
0         Non-trainable params
7.2 K     Total params
0.029     Total estimated model params size (MB)


Epoch 19: 100%|██████████| 65/65 [00:01<00:00, 35.48it/s, train_loss=0.00281]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 65/65 [00:01<00:00, 35.33it/s, train_loss=0.00281]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 7.2 K 
4 | V             | Linear           | 21    
---------------------------------------------------
7.2 K     Trainable params
0         Non-trainable params
7.2 K     Total params
0.029     Total estimated model params size (MB)


Epoch 19: 100%|██████████| 66/66 [00:01<00:00, 36.13it/s, train_loss=0.00266]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 66/66 [00:01<00:00, 35.98it/s, train_loss=0.00266]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 7.2 K 
4 | V             | Linear           | 21    
---------------------------------------------------
7.2 K     Trainable params
0         Non-trainable params
7.2 K     Total params
0.029     Total estimated model params size (MB)


Epoch 19: 100%|██████████| 66/66 [00:01<00:00, 36.45it/s, train_loss=0.00272]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 66/66 [00:01<00:00, 36.30it/s, train_loss=0.00272]

GPU available: False, used: False





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 7.2 K 
4 | V             | Linear           | 21    
---------------------------------------------------
7.2 K     Trainable params
0         Non-trainable params
7.2 K     Total params
0.029     Total estimated model params size (MB)


Epoch 19: 100%|██████████| 67/67 [00:01<00:00, 37.33it/s, train_loss=0.00199]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 67/67 [00:01<00:00, 37.19it/s, train_loss=0.00199]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 7.2 K 
4 | V             | Linear           | 21    
---------------------------------------------------
7.2 K     Trainable params
0         Non-trainable params
7.2 K     Total params
0.029     Total estimated model params size (MB)


Epoch 19: 100%|██████████| 67/67 [00:01<00:00, 36.43it/s, train_loss=0.00268]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 67/67 [00:01<00:00, 36.26it/s, train_loss=0.00268]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [23]:
# Store the results
rmse_cv = []

for i in range(len(cv)):
    # predictions
    predictions = TimeSeries.pd_series(transformer1.inverse_transform(cv[i]))

    # actuals
    start = predictions.index.min()
    end = predictions.index.max()

    actuals = df.y[start:end]

    # Compute the error
    error = np.sqrt(mean_squared_error(actuals, predictions))

    # Save the error
    rmse_cv.append(error)


In [25]:
np.mean(rmse_cv)

123.20455876728063

# Parameter Tuning

In [9]:
# Grid
param_grid = {'n_rnn_layers': [2,4],
              'hidden_dim': [20,30],
              'dropout': [0, 0.2],
              'n_epochs': [10,20],
              'lr': [.003],
              'training_length': [20],
              'input_chunk_length': [15]}

grid = ParameterGrid(param_grid=param_grid)
len(list(grid))

16

In [None]:
# Parameter tuning loop
rmse = []
j = 1
for params in grid:
    print(f"Loop number {j}", end='\n**********\n')
    # Build the model
    model = RNNModel(model = "LSTM",
                 hidden_dim = params['hidden_dim'],
                 n_rnn_layers= params['n_rnn_layers'],
                 dropout = params['dropout'],
                 n_epochs = params['n_epochs'],
                 optimizer_kwargs = {'lr': params['lr']},
                 random_state = 1502,
                 training_length = params['training_length'],
                 input_chunk_length= params['input_chunk_length'],
                 pl_trainer_kwargs = {"accelerator": "cpu"},
                 save_checkpoints = True)

    # Fit the model
    model.fit(y_transformed,
          future_covariates = covariates_transformed)

    print(f"**********\nENTERING CV LOOP", end='\n**********\n')

    # CV
    cv = model.historical_forecasts(y_transformed,
                                future_covariates = covariates_transformed,
                                start = df.shape[0] - 180,
                                forecast_horizon = 31,
                                stride = 16,
                                retrain = True,
                                last_points_only = False)

    # loop through cv
    rmse_cv = []
    for i in range(len(cv)):
      
      #Transform predictions
      predictions = TimeSeries.pd_series(transformer1.inverse_transform(cv[i]))

      # actuals
      start = predictions.index.min()
      end = predictions.index.max()

      actuals = df.y[start:end]

      # Compute the error
      error = np.sqrt(mean_squared_error(actuals, predictions))

      # Save the error
      rmse_cv.append(error)

    #Find average error
    avg_err = np.mean(rmse_cv)

    #Store error
    rmse.append(avg_err)
    j+=1

In [12]:
# Check results
tuning_results = pd.DataFrame(grid)
tuning_results['rmse'] = rmse
tuning_results

Unnamed: 0,dropout,hidden_dim,input_chunk_length,lr,n_epochs,n_rnn_layers,training_length,rmse
0,0.0,20,15,0.003,10,2,20,109.263032
1,0.0,20,15,0.003,10,4,20,129.920411
2,0.0,20,15,0.003,20,2,20,123.204559
3,0.0,20,15,0.003,20,4,20,138.848991
4,0.0,30,15,0.003,10,2,20,128.426728
5,0.0,30,15,0.003,10,4,20,154.475845
6,0.0,30,15,0.003,20,2,20,135.376757
7,0.0,30,15,0.003,20,4,20,158.052058
8,0.2,20,15,0.003,10,2,20,102.729418
9,0.2,20,15,0.003,10,4,20,107.747661


In [13]:
# Export best parameters
best_params = tuning_results[tuning_results.rmse == tuning_results.rmse.min()].transpose()
best_params

Unnamed: 0,8
dropout,0.2
hidden_dim,20.0
input_chunk_length,15.0
lr,0.003
n_epochs,10.0
n_rnn_layers,2.0
training_length,20.0
rmse,102.729418


In [14]:
# Get them
n_rnn_layers = int(best_params.loc['n_rnn_layers'])
hidden_dim = int(best_params.loc['hidden_dim'])
dropout = float(best_params.loc['dropout'])

  n_rnn_layers = int(best_params.loc['n_rnn_layers'])
  hidden_dim = int(best_params.loc['hidden_dim'])
  dropout = float(best_params.loc['dropout'])


### Round 2 Parameter Tuning

In [15]:
# Parameter tuning round 2

# Grid
param_grid = {'n_rnn_layers': [n_rnn_layers],
              'hidden_dim': [hidden_dim],
              'dropout': [dropout],
              'n_epochs': [10,20],
              'lr': [.003, 0.001],
              'training_length': [20, 30],
              'input_chunk_length': [15, 20]}

grid = ParameterGrid(param_grid=param_grid)
len(list(grid))

16

In [None]:
# Parameter tuning loop
rmse = []
j = 1
for params in grid:
    print(f"Loop number {j}", end='\n**********\n')
    # Build the model
    model = RNNModel(model = "LSTM",
                 hidden_dim = params['hidden_dim'],
                 n_rnn_layers= params['n_rnn_layers'],
                 dropout = params['dropout'],
                 n_epochs = params['n_epochs'],
                 optimizer_kwargs = {'lr': params['lr']},
                 random_state = 1502,
                 training_length = params['training_length'],
                 input_chunk_length= params['input_chunk_length'],
                 pl_trainer_kwargs = {"accelerator": "cpu"},
                 save_checkpoints = True)

    # Fit the model
    model.fit(y_transformed,
          future_covariates = covariates_transformed)

    print(f"**********\nENTERING CV LOOP", end='\n**********\n')

    # CV
    cv = model.historical_forecasts(y_transformed,
                                future_covariates = covariates_transformed,
                                start = df.shape[0] - 180,
                                forecast_horizon = 31,
                                stride = 16,
                                retrain = True,
                                last_points_only = False)

    # loop through cv
    rmse_cv = []
    for i in range(len(cv)):
      
      #Transform predictions
      predictions = TimeSeries.pd_series(transformer1.inverse_transform(cv[i]))

      # actuals
      start = predictions.index.min()
      end = predictions.index.max()

      actuals = df.y[start:end]

      # Compute the error
      error = np.sqrt(mean_squared_error(actuals, predictions))

      # Save the error
      rmse_cv.append(error)

    #Find average error
    avg_err = np.mean(rmse_cv)

    #Store error
    rmse.append(avg_err)
    j+=1

In [18]:
# Check results
tuning_results = pd.DataFrame(grid)
tuning_results['rmse'] = rmse
tuning_results

Unnamed: 0,dropout,hidden_dim,input_chunk_length,lr,n_epochs,n_rnn_layers,training_length,rmse
0,0.2,20,15,0.003,10,2,20,102.729418
1,0.2,20,15,0.003,10,2,30,107.650144
2,0.2,20,15,0.003,20,2,20,110.425098
3,0.2,20,15,0.003,20,2,30,100.714687
4,0.2,20,15,0.001,10,2,20,128.727663
5,0.2,20,15,0.001,10,2,30,126.128145
6,0.2,20,15,0.001,20,2,20,97.125527
7,0.2,20,15,0.001,20,2,30,106.216988
8,0.2,20,20,0.003,10,2,20,101.286304
9,0.2,20,20,0.003,10,2,30,105.513794


In [20]:
# Export best parameters
best_params = tuning_results[tuning_results.rmse == tuning_results.rmse.min()].transpose()
best_params.to_csv("Best Params LSTM.csv")

# LSTM Forecasting Model!

## Libraries and Data

In [None]:
#libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import ParameterGrid
from darts.timeseries import TimeSeries
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.dataprocessing.transformers import Scaler
from darts.models import RNNModel
from sklearn.metrics import mean_squared_error

In [44]:
# Load the data
df = pd.read_csv('../nyc_data.csv', index_col=0, parse_dates=True)
future_df = pd.read_csv('../future.csv', index_col=0, parse_dates=True)
future_df.head()

Unnamed: 0_level_0,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-01,,0,0,0,5.0,154.221
2021-01-02,,0,0,0,11.11,264.805
2021-01-03,,0,0,0,3.89,115.499
2021-01-04,,0,0,0,6.67,124.65
2021-01-05,,0,0,0,5.56,77.968


In [45]:
# Extract regressors
X_train = df.iloc[:, 1:]
X_future = future_df.iloc[:, 1:]
X_train

Unnamed: 0_level_0,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-01,0,0,0,3.68,41.305
2015-01-02,0,0,0,4.73,131.574
2015-01-03,0,0,0,7.23,162.700
2015-01-04,0,0,0,10.96,160.281
2015-01-05,0,0,0,6.92,51.077
...,...,...,...,...,...
2020-12-27,0,0,0,2.89,38.674
2020-12-28,0,0,0,8.83,166.712
2020-12-29,0,0,0,3.48,161.865
2020-12-30,0,0,0,5.97,179.634


In [46]:
# Merge both
X = pd.concat([X_train, X_future])
X

Unnamed: 0_level_0,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-01,0,0,0,3.68,41.305
2015-01-02,0,0,0,4.73,131.574
2015-01-03,0,0,0,7.23,162.700
2015-01-04,0,0,0,10.96,160.281
2015-01-05,0,0,0,6.92,51.077
...,...,...,...,...,...
2021-01-27,0,0,0,3.33,39.664
2021-01-28,0,0,0,1.67,195.314
2021-01-29,0,0,0,-2.78,235.894
2021-01-30,0,0,0,1.11,152.752


In [47]:
# Rename the variables
df = df.rename(columns={"Demand": 'y'})
df.head(0)

Unnamed: 0_level_0,y,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


## Prepare for LSTM

In [48]:
# Time Series object
series_f = TimeSeries.from_series(df.y)
covariates_f = TimeSeries.from_dataframe(X)

In [53]:
# Year
year_series_f = datetime_attribute_timeseries(
    pd.date_range(start=series_f.start_time(),
                  freq=series_f.freq_str,
                  periods=X.shape[0]),
    attribute="year",
    one_hot=False,
)

# Month
month_series_f = datetime_attribute_timeseries(year_series_f,
                                               attribute='month',
                                               one_hot=True)

# Weekday
weekday_series_f = datetime_attribute_timeseries(year_series_f,
                                                 attribute='weekday',
                                                 one_hot=True)


In [54]:
# Preparing scalars
transformer1f = Scaler()
transformer2f = Scaler()

In [55]:
# Scale the y
y_transformed_f = transformer1f.fit_transform(series=series_f)


In [56]:
# Scale the covariates
covariates_f = covariates_f.stack(year_series_f)
covariates_transformed_f = transformer2f.fit_transform(covariates_f)
covariates_transformed_f = covariates_transformed_f.stack(month_series_f)
covariates_transformed_f = covariates_transformed_f.stack(weekday_series_f)

## LSTM

In [57]:
# Get the best parameters
parameters = pd.read_csv("Best Params LSTM.csv", index_col=0)
parameters

Unnamed: 0,14
dropout,0.2
hidden_dim,20.0
input_chunk_length,20.0
lr,0.001
n_epochs,20.0
n_rnn_layers,2.0
training_length,20.0
rmse,96.17114


In [58]:
n_rnn_layers = int(parameters.loc['n_rnn_layers'][0])
dropout = float(parameters.loc['dropout'][0])
hidden_dim = int(parameters.loc['hidden_dim'][0])
input_chunk_length = int(parameters.loc['input_chunk_length'][0])
lr = float(parameters.loc['lr'][0])
n_epochs = int(parameters.loc['n_epochs'][0])
training_length = int(parameters.loc['training_length'][0])

  n_rnn_layers = int(parameters.loc['n_rnn_layers'][0])
  dropout = float(parameters.loc['dropout'][0])
  hidden_dim = int(parameters.loc['hidden_dim'][0])
  input_chunk_length = int(parameters.loc['input_chunk_length'][0])
  lr = float(parameters.loc['lr'][0])
  n_epochs = int(parameters.loc['n_epochs'][0])
  training_length = int(parameters.loc['training_length'][0])


In [60]:
# Model
model = RNNModel(model = 'LSTM',
                 hidden_dim=hidden_dim,
                 input_chunk_length=input_chunk_length,
                 n_rnn_layers=n_rnn_layers,
                 dropout=dropout,
                 n_epochs=n_epochs,
                 optimizer_kwargs = {'lr': lr},
                 random_state = 1502,
                 training_length=training_length,
                 pl_trainer_kwargs = {"accelerator": "cpu"},
                 save_checkpoints = True)

In [61]:
# Fit the model to the data
model.fit(y_transformed_f,
          future_covariates = covariates_transformed_f)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 7.3 K 
4 | V             | Linear           | 21    
---------------------------------------------------
7.3 K     Trainable params
0         Non-trainable params
7.3 K     Total params
0.029     Total estimated model params size (MB)


Epoch 19: 100%|██████████| 68/68 [00:01<00:00, 34.07it/s, train_loss=0.00349]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 68/68 [00:02<00:00, 33.93it/s, train_loss=0.00349]


RNNModel(model=LSTM, hidden_dim=20, n_rnn_layers=2, dropout=0.2, training_length=20, input_chunk_length=20, n_epochs=20, optimizer_kwargs={'lr': 0.001}, random_state=1502, pl_trainer_kwargs={'accelerator': 'cpu'}, save_checkpoints=True)

## Predictions and Exporting

In [64]:
preds_f = model.predict(n = len(future_df),
                        future_covariates = covariates_transformed_f)
preds_f = TimeSeries.pd_series(transformer1f.inverse_transform(preds_f)).rename("LSTM")
preds_f

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 45.92it/s]


Date
2021-01-01    742.949272
2021-01-02    869.389065
2021-01-03    768.538369
2021-01-04    906.973984
2021-01-05    768.105942
2021-01-06    929.918020
2021-01-07    811.031269
2021-01-08    728.682587
2021-01-09    853.326997
2021-01-10    768.273978
2021-01-11    910.168396
2021-01-12    775.780465
2021-01-13    915.733838
2021-01-14    818.456157
2021-01-15    722.115982
2021-01-16    857.137383
2021-01-17    768.446891
2021-01-18    909.265821
2021-01-19    765.032043
2021-01-20    916.460758
2021-01-21    810.705656
2021-01-22    710.672690
2021-01-23    838.071734
2021-01-24    759.320251
2021-01-25    883.503061
2021-01-26    758.559462
2021-01-27    897.133170
2021-01-28    799.197280
2021-01-29    713.782382
2021-01-30    845.118335
2021-01-31    761.673001
Freq: D, Name: LSTM, dtype: float64

In [65]:
# Exporting
preds_f.to_csv("predictions_LSTM.csv")