#Libraries and Data

In [1]:
# Change directory
%cd /content/drive/MyDrive/Time Series Forecasting Product

/content/drive/MyDrive/Time Series Forecasting Product


In [None]:
pip install --upgrade darts

In [None]:
# Install libraries
!pip install pyyaml==5.4.1
!pip install darts

In [4]:
#libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from darts.timeseries import TimeSeries
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.dataprocessing.transformers import Scaler
from darts.models import RNNModel

In [5]:
#load the data
#YYYY-MM-DD
df = pd.read_csv('nyc_data.csv', index_col = 0, parse_dates = True)
future_df = pd.read_csv('future.csv', index_col = 0, parse_dates = True)
df.head(0)


Unnamed: 0_level_0,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


In [6]:
# Extract regressors
X_train = df.iloc[:,1:]
X_future = future_df.iloc[:,1:]

In [7]:
# merge both
X =pd.concat([X_train, X_future])
X

Unnamed: 0_level_0,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-01,0,0,0,3.68,41.305
2015-01-02,0,0,0,4.73,131.574
2015-01-03,0,0,0,7.23,162.700
2015-01-04,0,0,0,10.96,160.281
2015-01-05,0,0,0,6.92,51.077
...,...,...,...,...,...
2021-01-27,0,0,0,3.33,39.664
2021-01-28,0,0,0,1.67,195.314
2021-01-29,0,0,0,-2.78,235.894
2021-01-30,0,0,0,1.11,152.752


In [8]:
# Rename Variable
df = df.rename(columns = {'Demand': 'y'})
df.head(0)

Unnamed: 0_level_0,y,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


#Prepare for LSTM

In [9]:
#Time Series object
series = TimeSeries.from_series(df.y)
covariates = TimeSeries.from_dataframe(X)

In [10]:
#year
year_series = datetime_attribute_timeseries(
    pd.date_range(start = series.start_time(),
                  freq = series.freq_str,
                  periods = X.shape[0]),
                  attribute = "year",
                  one_hot = False) # false -> year is a natural cycle

#month
month_series = datetime_attribute_timeseries(year_series,
                                             attribute = "month",
                                             one_hot = True) #true -> month goes from Jan-Dec,no natural order (eg: Aug is not bigger than July), therefore must include it

#weekday
weekday_series = datetime_attribute_timeseries(year_series,
                                               attribute = "weekday",
                                               one_hot = True)

# One-hot encoding is a technique used to represent categorical data as binary vectors.
# 1 -> hot
# 2 -> cold


In [11]:
from pandas.core.tools.datetimes import Scalar
# Preparing Scalers for series and covariates
transformer1 = Scaler()
transformer2 = Scaler()

In [12]:
# Scale the timeseries (Y)
y_transformed = transformer1.fit_transform(series)
y_transformed

In [13]:
# Normalized/Scale the covariates
covariates = covariates.stack(year_series)
covariates_transformed = transformer2.fit_transform(covariates)
covariates_transformed = covariates_transformed.stack(month_series)
covariates_transformed = covariates_transformed.stack(weekday_series)

# LSTM

In [14]:
# Get the best parameters
parameters = pd.read_csv("Forecasting Product/best_params_lstm.csv",
                         index_col = 0)
parameters

Unnamed: 0,14
dropout,0.2
hidden_dim,30.0
input_chunk_length,20.0
lr,0.003
n_epochs,20.0
n_rnn_layers,4.0
training_length,20.0
rmse,83.370327


In [15]:
# Getting the parameters
n_rnn_layers = int(parameters.loc["n_rnn_layers"][0])
dropout = float(parameters.loc["dropout"][0])
hidden_dim = int(parameters.loc["hidden_dim"][0])
input_chunk_length = int(parameters.loc["input_chunk_length"][0])
lr = float(parameters.loc["lr"][0])
n_epochs = int(parameters.loc["n_epochs"][0])
training_length = int(parameters.loc["training_length"][0])

In [16]:
# model
model = RNNModel(model = "LSTM",
                 hidden_dim = hidden_dim,
                 n_rnn_layers = n_rnn_layers,
                 dropout = dropout,
                 n_epochs = 20,
                 optimizer_kwargs = {"lr": lr},
                 random_state = 1502,
                 training_length = training_length,
                 input_chunk_length = input_chunk_length,
                 pl_trainer_kwargs = {"accelerator":"auto"},
                 save_checkpoints = False)


In [None]:
!pip install --upgrade pytorch-lightning

In [18]:
# fit the model to the data
model.fit(y_transformed,
          future_covariates = covariates_transformed)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 29.3 K
4 | V             | Linear           | 31    
---------------------------------------------------
29.3 K    Trainable params
0         Non-trainable params
29.3 K    Total params
0.117     Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


RNNModel(model=LSTM, hidden_dim=30, n_rnn_layers=4, dropout=0.2, training_length=20, n_epochs=20, optimizer_kwargs={'lr': 0.003}, random_state=1502, input_chunk_length=20, pl_trainer_kwargs={'accelerator': 'auto'}, save_checkpoints=False)

# Predictions and Exporting

In [19]:
predictions_lstm = model.predict(n = len(future_df),
                                 future_covariates = covariates_transformed)
predictions_lstm = TimeSeries.pd_series(
    transformer1.inverse_transform(predictions_lstm)).rename("lstm")
predictions_lstm

INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

Date
2021-01-01    731.538966
2021-01-02    862.598676
2021-01-03    761.259599
2021-01-04    910.333804
2021-01-05    770.298379
2021-01-06    938.918505
2021-01-07    803.011561
2021-01-08    724.529980
2021-01-09    859.422634
2021-01-10    760.325915
2021-01-11    912.169185
2021-01-12    768.966821
2021-01-13    929.445215
2021-01-14    807.096273
2021-01-15    723.548841
2021-01-16    856.733711
2021-01-17    755.554102
2021-01-18    897.951174
2021-01-19    760.709397
2021-01-20    927.057847
2021-01-21    800.919301
2021-01-22    717.433983
2021-01-23    853.727742
2021-01-24    756.829767
2021-01-25    904.143178
2021-01-26    765.001339
2021-01-27    920.431649
2021-01-28    799.604310
2021-01-29    719.657386
2021-01-30    852.886702
2021-01-31    755.079150
Freq: D, Name: lstm, dtype: float64

In [20]:
# #Exporting
predictions_lstm.to_csv("Forecasting Product/Ensemble/predictions_lstm.csv")