# Libraries and Data

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
#change directory
%cd /content/drive/MyDrive/Time Series Forecasting Product

/content/drive/MyDrive/Time Series Forecasting Product


In [3]:
!pip install darts

Collecting darts
  Downloading darts-0.26.0-py3-none-any.whl (784 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m784.8/784.8 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
Collecting nfoursid>=1.0.0 (from darts)
  Downloading nfoursid-1.0.1-py3-none-any.whl (16 kB)
Collecting pmdarima>=1.8.0 (from darts)
  Downloading pmdarima-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m50.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyod>=0.9.5 (from darts)
  Downloading pyod-1.1.1.tar.gz (159 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m159.4/159.4 kB[0m [31m20.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting shap>=0.40.0 (from darts)
  Downloading shap-0.43.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (532 k

In [4]:
#libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import ParameterGrid

In [5]:
#load the data
#YYYY-MM-DD
df = pd.read_csv('nyc_data.csv', index_col = 0, parse_dates = True)
future_df = pd.read_csv('future.csv', index_col = 0, parse_dates = True)

In [7]:
future_df.head()

Unnamed: 0_level_0,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-01,,0,0,0,5.0,154.221
2021-01-02,,0,0,0,11.11,264.805
2021-01-03,,0,0,0,3.89,115.499
2021-01-04,,0,0,0,6.67,124.65
2021-01-05,,0,0,0,5.56,77.968


In [8]:
# Extract the regressors
X_train = df.iloc[:,1:]
X_future = future_df.iloc[:,1:]

# Merge the 2 inputs
X = pd.concat([X_train, X_future])

In [9]:
#Rename variable
df = df.rename(columns = {'Demand': 'y'})
df.head(0)

Unnamed: 0_level_0,y,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


# Prepare for LSTM

In [10]:
# Creating Time Series
from darts.timeseries import TimeSeries
series = TimeSeries.from_series(df.y)
covariates = TimeSeries.from_dataframe(X)

In [18]:
# Seasonality / trend
from darts.utils.timeseries_generation import datetime_attribute_timeseries

# Year
year_series = datetime_attribute_timeseries(
    pd.date_range(start = series.start_time(),
                  freq = series.freq_str,
                  periods = X.shape[0]),
    attribute = "year",
    one_hot = False)

# Month
month_series = datetime_attribute_timeseries(year_series,
                                             attribute= "month",
                                             one_hot = True)

# Weekday
weekday_series = datetime_attribute_timeseries(year_series,
                                             attribute= "weekday",
                                             one_hot = True)


In [19]:
# Scaling
from darts.dataprocessing.transformers import Scaler
transformer1 = Scaler()
transformer2 = Scaler()

In [20]:
# Scaling Y
y_transformed = transformer1.fit_transform(series)

In [21]:
# Stacking the covariates
covariates = covariates.stack(year_series)

In [22]:
# Scaling the Covariates
covariates_transformed = transformer2.fit_transform(covariates)

In [23]:
# Stack the Seasonal variables
covariates_transformed = covariates_transformed.stack(month_series)
covariates_transformed = covariates_transformed.stack(weekday_series)

# LSTM Model

In [24]:
# Load the tuned parameters
parameters = pd.read_csv("Forecasting Product/best_params_lstm.csv",
                         index_col = 0)
parameters

Unnamed: 0,12
dropout,0.1
hidden_dim,20.0
input_chunk_length,20.0
lr,0.003
n_epochs,10.0
n_rnn_layers,1.0
training_length,20.0
rmse,95.303638


In [25]:
# Isolate the params
n_rnn_layers = int(parameters.loc['n_rnn_layers'])
hidden_dim = int(parameters.loc['hidden_dim'])
dropout = float(parameters.loc['dropout'])
lr = float(parameters.loc['lr'])
input_chunk_length = int(parameters.loc['input_chunk_length'])
n_epochs = int(parameters.loc['n_epochs'])
training_length = int(parameters.loc['training_length'])

In [26]:
# Build the LSTM model
# https://unit8co.github.io/darts/generated_api/darts.models.forecasting.rnn_model.html
from darts.models import RNNModel
model = RNNModel(model = "LSTM",
                 hidden_dim = hidden_dim,
                 n_rnn_layers = n_rnn_layers,
                 dropout = dropout,
                 n_epochs = n_epochs,
                 optimizer_kwargs = {"lr": lr},
                 random_state = 1502,
                 training_length = training_length,
                 input_chunk_length=input_chunk_length,
                 pl_trainer_kwargs = {"accelerator": "gpu", "devices": [0]}
                 )

In [27]:
# Fit the model to the data
model.fit(y_transformed,
          future_covariates = covariates_transformed)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 3.8 K 
4 | V             | Linear           | 21    
---------------------------------------------------
3.9 K     Trainable params
0         Non-trainable params
3.9 K     Total params
0.015     Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=10` reached.


RNNModel(model=LSTM, hidden_dim=20, n_rnn_layers=1, dropout=0.1, training_length=20, n_epochs=10, optimizer_kwargs={'lr': 0.003}, random_state=1502, input_chunk_length=20, pl_trainer_kwargs={'accelerator': 'gpu', 'devices': [0]})

# Forecasting and Exporting

In [33]:
predictions_lstm = model.predict(n = len(future_df),
                                 future_covariates = covariates_transformed)
predictions_lstm = TimeSeries.pd_series(
    transformer1.inverse_transform(
        predictions_lstm)).rename("lstm")

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

In [34]:
# Exporting
predictions_lstm.to_csv("Forecasting Product/Ensemble/predictions_lstm.csv")