# 1. Import libraries and data

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM, Dense, Flatten, GRU, Dropout,RNN
from neuralprophet import NeuralProphet
import warnings
warnings.filterwarnings("ignore")



In [2]:
dom = pd.read_excel('dom.xlsx')
dom = dom.rename(columns={'DATE': 'ds', 'Pax': 'y'})

In [3]:
dom.head(16)

Unnamed: 0,ds,y,seat
0,2015-01-01,52356.0,59972
1,2015-01-02,47811.0,56277
2,2015-01-03,49555.0,60035
3,2015-01-04,57680.0,62548
4,2015-01-05,50193.0,58566
5,2015-01-06,45867.0,54165
6,2015-01-07,42315.0,54109
7,2015-01-08,46726.0,56959
8,2015-01-09,46950.0,55739
9,2015-01-10,45987.0,56495


In [4]:
holidays = pd.read_excel('holidays2.xlsx')
holidays = holidays.rename(columns={'holiday': 'event'})

In [5]:
m = NeuralProphet()
m.add_events(events = ['covid_1','covid_2','covid_3','covid_4','No_int','Lunar','Labor','Summer','new_year'])

<neuralprophet.forecaster.NeuralProphet at 0x1c558fa92e0>

In [6]:
df = m.create_df_with_events(dom,holidays)

INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column


In [7]:
df

Unnamed: 0,ds,y,seat,covid_1,covid_2,covid_3,covid_4,No_int,Lunar,Labor,Summer,new_year
0,2015-01-01,52356.0,59972,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,2015-01-02,47811.0,56277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2015-01-03,49555.0,60035,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2015-01-04,57680.0,62548,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2015-01-05,50193.0,58566,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
3007,2023-03-27,102822.0,138545,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3008,2023-03-28,92698.0,134534,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3009,2023-03-29,96072.0,130512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3010,2023-03-30,100110.0,128780,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# 2. Data preparation & windowing

In [8]:
## Shift seat data for 7 days:
df['seat'] = df['seat'].shift(7)

In [9]:
df

Unnamed: 0,ds,y,seat,covid_1,covid_2,covid_3,covid_4,No_int,Lunar,Labor,Summer,new_year
0,2015-01-01,52356.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,2015-01-02,47811.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2015-01-03,49555.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2015-01-04,57680.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2015-01-05,50193.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
3007,2023-03-27,102822.0,127252.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3008,2023-03-28,92698.0,118724.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3009,2023-03-29,96072.0,122685.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3010,2023-03-30,100110.0,122533.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
df.dropna(inplace = True)

In [11]:
# It is important to scale features before training a neural network.
# Normalization is a common way of doing this scaling: subtract the mean and divide by the standard deviation of each feature.
train_mean = df[['y','seat']].mean()
train_std = df[['y','seat']].std()

In [12]:
#Split data:
train_df = df[df['ds'] < '2022-01-01'].copy()
val_df = df[(df['ds'] >= '2022-01-01')&(df['ds'] < '2023-01-01')].copy()
test_df = df[df['ds'] >= '2023-01-01'].copy()

In [13]:
train_df[['y','seat']] = (train_df[['y','seat']] - train_mean) / train_std
val_df[['y','seat']] = (val_df[['y','seat']] - train_mean) / train_std
test_df[['y','seat']] = (test_df[['y','seat']] - train_mean) / train_std

In [14]:
train = train_df.iloc[:,1:].to_numpy()
val = val_df.iloc[:,1:].to_numpy()
test = test_df.iloc[:,1:].to_numpy()

In [15]:
len(train)

2550

In [16]:
# define a function that reshape input data to (number of samples, sequence, features) shape:
# number of samples: how many sequences/samples we can generate based on dataset
# sequence: how long is the sequence, i.e. number of time steps per sample
# features: number of parallel time series or the number of variables

def split_sequences(sequences, n_steps):
    X = list()
    y = list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the dataset
        if end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix,1:],sequences[end_ix-1,0]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

In [17]:
# choose a number of time steps
n_steps = 365
# reshape input data:
X_train, y_train = split_sequences(train, n_steps)
X_val, y_val = split_sequences(val, n_steps)
X_test, y_test = split_sequences(test, n_steps)

In [23]:
print(X_train[0], y_train[0])

[[-1.05133818  0.          0.         ...  0.          0.
   0.        ]
 [-1.14424412  0.          0.         ...  0.          0.
   0.        ]
 [-1.04975412  0.          0.         ...  0.          0.
   0.        ]
 ...
 [-0.56785009  0.          0.         ...  0.          0.
   0.        ]
 [-0.43692688  0.          0.         ...  0.          0.
   0.        ]
 [-0.45226454  0.          0.         ...  0.          0.
   0.        ]] -0.7865411441233181


In [24]:
X_train[0].shape

(365, 10)

# 3. Build models

## 3.1. LSTM

In [46]:
# define model
model1 = Sequential()
model1.add(LSTM(64, activation='relu', return_sequences=True, input_shape=(n_steps,10)))
model1.add(LSTM(32, return_sequences=False))
model1.add(Dense(1, activation='relu'))

In [47]:
model1.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_14 (LSTM)              (None, 365, 64)           19200     
                                                                 
 lstm_15 (LSTM)              (None, 32)                12416     
                                                                 
 dense_8 (Dense)             (None, 1)                 33        
                                                                 
Total params: 31,649
Trainable params: 31,649
Non-trainable params: 0
_________________________________________________________________


In [None]:
# fit model
model1.compile(optimizer='adam', loss='mse', metrics=['mae'])
history = model1.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
11/69 [===>..........................] - ETA: 9s - loss: 0.8648 - mae: 0.6370

In [None]:
yhat = model1.predict(X_test)