In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import ParameterGrid
from darts.timeseries import TimeSeries
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.dataprocessing.transformers import Scaler

In [None]:
df = pd.read_csv("nyc_data.csv", index_col=0, parse_dates=True)
df = df.rename(columns={"Demand":"y"})
df.head(1)

In [None]:
series = TimeSeries.from_series(df.y)
covariates = TimeSeries.from_dataframe(df.iloc[:, 1:])

In [None]:
year_series = datetime_attribute_timeseries(pd.date_range(start=series.start_time(),
                                            freq=series.freq_str,
                                            periods=df.shape[0]),
                                            attribute='year',
                                            one_hot=False)
month_series = datetime_attribute_timeseries(year_series,
                                             attribute='month',
                                             one_hot=True)
weekday_series = datetime_attribute_timeseries(year_series,
                                               attribute='weekday',
                                               one_hot=True)

In [None]:
# Scaling
transformer1 = Scaler()
transformer2 = Scaler()

In [None]:
y_transformed = transformer1.fit_transform(series)
covariates = covariates.stack(year_series)
covariates_transformed = transformer2.fit_transform(covariates)

In [None]:
covariates_transformed = covariates_transformed.stack(month_series)
covariates_transformed = covariates_transformed.stack(weekday_series)

### LSTM Model

In [None]:
from darts.models import RNNModel

In [None]:
model = RNNModel(model='LSTM',
                 hidden_dim=20,
                 n_rnn_layers=2,
                 dropout=0.2,
                 n_epochs=10,
                 optimizer_kwargs={'lr':0.003},
                 random_state=42,
                 training_length=20,
                 input_chunk_length=15,
                 pl_trainer_kwargs = {"accelerator": "gpu", "devices": [0]})

In [None]:
model.fit(y_transformed,
          future_covariates=covariates_transformed)

#### Cross Validation

In [None]:
cv = model.historical_forecasts(series=y_transformed,
                                future_covariates=covariates_transformed,
                                start=df.shape[0]-180,
                                forecast_horizon=31,
                                stride=16,
                                retrain=True,
                                last_points_only=False)

In [None]:
from sklearn.metrics import mean_squared_error
# Store the results
rmse_cv = []

for i in range(len(cv)):

  # Compute the RMSE for the CV
  predictions = TimeSeries.pd_series(transformer1.inverse_transform(cv[i]))

  # Actual values
  start = predictions.index.min()
  end = predictions.index.max()
  actuals = df.y[start:end]

  #compute the error
  error_cv = mean_squared_error(actuals, predictions, squared = False)

  # save the error
  rmse_cv.append(error_cv)

print(f"The RMSE is {np.mean(rmse_cv)}")

#### Parameter Tuning 1

In [None]:
# Grid
param_grid = {'n_rnn_layers': [1,2],
              'hidden_dim': [10,20],
              'dropout': [0.1,0.2],
              'n_epochs': [10,20],
              'lr': [0.003],
              'training_length': [20],
              'input_chunk_length': [15]}
grid = ParameterGrid(param_grid)
len(list(grid))

In [None]:
# Parameter Tuning
rmse = []

# Loop
for params in grid:
  # Build the LSTM model
  model = RNNModel(model = "LSTM",
                  hidden_dim = params['hidden_dim'],
                  n_rnn_layers = params['n_rnn_layers'],
                  dropout = params['dropout'],
                  n_epochs = params['n_epochs'],
                  optimizer_kwargs = {"lr": 0.003},
                  random_state = 42,
                  training_length = 20,
                  input_chunk_length=15,
                  pl_trainer_kwargs = {"accelerator": "gpu", "devices": [0]}
                  )

  # Fit the model to the data
  model.fit(y_transformed,
            future_covariates = covariates_transformed)

  # CV
  cv = model.historical_forecasts(series = y_transformed,
                                  future_covariates = covariates_transformed,
                                  start = df.shape[0] - 180,
                                  forecast_horizon = 31,
                                  stride = 16,
                                  retrain = True,
                                  last_points_only = False)

  # Measure and store the error
  # Store the results
  rmse_cv = []

  for i in range(len(cv)):

    # Compute the RMSE for the CV
    predictions = TimeSeries.pd_series(transformer1.inverse_transform(cv[i]))

    # Actual values
    start = predictions.index.min()
    end = predictions.index.max()
    actuals = df.y[start:end]

    #compute the error
    error_cv = mean_squared_error(actuals,
                                  predictions,
                                  squared = False)

    # save the error
    rmse_cv.append(error_cv)

  error = np.mean(rmse_cv)
  rmse.append(error)

In [None]:
# Parameter Tuning outcome
tuning_results = pd.DataFrame(grid)
tuning_results['rmse'] = rmse
tuning_results

In [None]:
# Exporting the tuned parameters
best_params = tuning_results[tuning_results.rmse == tuning_results.rmse.min()].transpose()
best_params

In [None]:
# Isolate the params
n_rnn_layers = int(best_params.loc['n_rnn_layers'])
hidden_dim = int(best_params.loc['hidden_dim'])
dropout = float(best_params.loc['dropout'])

#### Parameter Tuning 2

In [None]:
# Grid
param_grid = {'n_rnn_layers': [n_rnn_layers],
              'hidden_dim': [hidden_dim],
              'dropout': [dropout],
              'n_epochs': [10,20],
              'lr': [0.001, 0.003],
              'training_length': [20, 30],
              'input_chunk_length': [15, 20]}
grid = ParameterGrid(param_grid)
len(list(grid))

In [None]:
# Parameter Tuning
rmse = []

# Loop
for params in grid:
  # Build the LSTM model
  model = RNNModel(model = "LSTM",
                  hidden_dim = params['hidden_dim'],
                  n_rnn_layers = params['n_rnn_layers'],
                  dropout = params['dropout'],
                  n_epochs = params['n_epochs'],
                  optimizer_kwargs = {"lr": params['lr']},
                  random_state = 1502,
                  training_length = params['training_length'],
                  input_chunk_length=params['input_chunk_length'],
                  pl_trainer_kwargs = {"accelerator": "gpu", "devices": [0]}
                  )

  # Fit the model to the data
  model.fit(y_transformed,
            future_covariates = covariates_transformed)

  # CV
  cv = model.historical_forecasts(series = y_transformed,
                                  future_covariates = covariates_transformed,
                                  start = df.shape[0] - 180,
                                  forecast_horizon = 31,
                                  stride = 16,
                                  retrain = True,
                                  last_points_only = False)

  # Measure and store the error
  # Store the results
  rmse_cv = []

  for i in range(len(cv)):

    # Compute the RMSE for the CV
    predictions = TimeSeries.pd_series(transformer1.inverse_transform(cv[i]))

    # Actual values
    start = predictions.index.min()
    end = predictions.index.max()
    actuals = df.y[start:end]

    #compute the error
    error_cv = mean_squared_error(actuals,
                                  predictions,
                                  squared = False)

    # save the error
    rmse_cv.append(error_cv)

  error = np.mean(rmse_cv)
  rmse.append(error)

In [None]:
# Parameter Tuning outcome
tuning_results = pd.DataFrame(grid)
tuning_results['rmse'] = rmse
tuning_results

In [None]:
best_params = tuning_results[tuning_results.rmse == tuning_results.rmse.min()].transpose()