In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn import preprocessing
import matplotlib.pyplot as plt
tf.random.set_seed(123)

In [2]:
df = pd.read_csv(r'G:\Study and WorkRelated\Book Proposal\book\Code and Data\Data\FB_DL.csv')

In [3]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2013-01-28,31.879999,32.509998,31.809999,32.470001,32.470001,59682500
1,2013-01-29,32.0,32.07,30.709999,30.790001,30.790001,72976500
2,2013-01-30,30.98,31.49,30.879999,31.24,31.24,87682100
3,2013-01-31,29.15,31.469999,28.74,30.98,30.98,190744900
4,2013-02-01,31.01,31.02,29.629999,29.73,29.73,85856700


In [4]:
df.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,1761.0,1761.0,1761.0,1761.0,1761.0,1761.0
mean,118.78184,120.008399,117.500392,118.811113,118.811113,30958130.0
std,53.322957,53.748415,52.885288,53.354433,53.354433,25208600.0
min,22.99,23.09,22.67,22.9,22.9,5913100.0
25%,75.589996,76.57,74.779999,75.739998,75.739998,15849800.0
50%,118.629997,119.43,117.610001,118.669998,118.669998,22751200.0
75%,169.820007,171.5,168.25,169.919998,169.919998,36796000.0
max,222.570007,222.75,221.279999,222.139999,222.139999,365457900.0


In [6]:
def custom_ts_multi_data_prep(dataset, target, start_index, end_index, window, horizon):
    data = []
    labels = []

    start_index = start_index + window
    if end_index is None:
        end_index = len(dataset) - horizon

    for i in range(start_index, end_index):
        indices = range(i-window, i)
        data.append(dataset[indices])

        indicey = range(i+1, i+1+horizon)
        labels.append(target[indicey])
    return np.array(data), np.array(labels)

In [7]:
validate = df[['Open', 'High','Low','Close']].tail(70)
df.drop(df.tail(70).index,inplace=True)

In [8]:
x_scaler = preprocessing.MinMaxScaler()
y_scaler = preprocessing.MinMaxScaler()
dataX = x_scaler.fit_transform(df[['Open', 'High','Low']])
dataY = y_scaler.fit_transform(df[['Close']])

In [9]:
hist_window = 60
horizon = 10
TRAIN_SPLIT = 1000
x_train_multi, y_train_multi = custom_ts_multi_data_prep(
    dataX, dataY, 0, TRAIN_SPLIT, hist_window, horizon)
x_val_multi, y_val_multi= custom_ts_multi_data_prep(
    dataX, dataY, TRAIN_SPLIT, None, hist_window, horizon)

In [10]:
x_train_multi[0]

array([[0.0461267 , 0.04817674, 0.04770354],
       [0.04674934, 0.04592646, 0.04196242],
       [0.04145696, 0.04296016, 0.04284968],
       [0.03196181, 0.04285787, 0.03168058],
       [0.04161262, 0.04055644, 0.03632567],
       [0.03149483, 0.03124841, 0.02787056],
       [0.02734395, 0.03002096, 0.02802714],
       [0.02983448, 0.0317087 , 0.03126305],
       [0.03175427, 0.03099269, 0.02922756],
       [0.03061277, 0.03109497, 0.03048017],
       [0.02915997, 0.02858896, 0.02802714],
       [0.02428268, 0.02592953, 0.02312109],
       [0.02267421, 0.02674781, 0.02421711],
       [0.02609869, 0.02833324, 0.02787056],
       [0.02869299, 0.02894697, 0.0282881 ],
       [0.02718829, 0.03063469, 0.02844468],
       [0.03076843, 0.03048125, 0.02954071],
       [0.02744773, 0.0279241 , 0.02338205],
       [0.02402325, 0.02321894, 0.02165971],
       [0.02163649, 0.02327008, 0.02338205],
       [0.02267421, 0.02234951, 0.02103341],
       [0.02257044, 0.0217358 , 0.02066805],
       [0.

In [11]:
y_train_multi[0]

array([[0.01664953],
       [0.02029805],
       [0.02096608],
       [0.02502569],
       [0.02327852],
       [0.03119218],
       [0.02780061],
       [0.02399794],
       [0.02050359],
       [0.02168551]])

In [12]:
BATCH_SIZE = 128
BUFFER_SIZE = 150

train_data_multi = tf.data.Dataset.from_tensor_slices((x_train_multi, y_train_multi))
train_data_multi = train_data_multi.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
val_data_multi = val_data_multi.batch(BATCH_SIZE).repeat()

In [13]:
simple_GRU_model = tf.keras.models.Sequential([
    tf.keras.layers.GRU(100, input_shape=x_train_multi.shape[-2:],return_sequences=True),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.GRU(units=50,return_sequences=False),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=horizon),
])
simple_GRU_model.compile(optimizer='adam', loss='mse')


In [14]:
model_path = r'G:\Study and WorkRelated\Book Proposal\book\Code and Data\Section 3\Chapter 6\GRU_Multivariate.h5'

In [15]:
EVALUATION_INTERVAL = 100
EPOCHS = 150
history = simple_GRU_model.fit(train_data_multi, epochs=EPOCHS,steps_per_epoch=EVALUATION_INTERVAL,validation_data=val_data_multi, validation_steps=50,verbose =1,
                              callbacks =[tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='min'),tf.keras.callbacks.ModelCheckpoint(model_path,monitor='val_loss', save_best_only=True, mode='min', verbose=0)])

Train for 100 steps, validate for 50 steps
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 00039: early stopping


In [16]:
Trained_model = tf.keras.models.load_model(r'G:\Study and WorkRelated\Book Proposal\book\Code and Data\Section 3\Chapter 6\GRU_Multivariate.h5')

In [17]:
# Show the model architecture
Trained_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    (None, 60, 100)           31500     
_________________________________________________________________
dropout (Dropout)            (None, 60, 100)           0         
_________________________________________________________________
gru_1 (GRU)                  (None, 50)                22800     
_________________________________________________________________
dropout_1 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense (Dense)                (None, 10)                510       
Total params: 54,810
Trainable params: 54,810
Non-trainable params: 0
_________________________________________________________________


In [24]:
validatehist = validate[['Open', 'High','Low']].head(60)
validatehori = validate[['Open', 'High','Low','Close']].tail(10)

In [25]:
validatehist

Unnamed: 0,Open,High,Low
1691,183.800003,190.380005,183.660004
1692,188.320007,189.699997,186.899994
1693,190.300003,190.839996,188.770004
1694,190.229996,191.490005,183.919998
1695,187.039993,189.910004,186.75
1696,190.0,190.649994,181.5
1697,182.009995,186.380005,182.0
1698,184.619995,186.729996,182.800003
1699,185.830002,189.0,185.089996
1700,187.199997,189.529999,185.080002


In [26]:
validatehist_x = x_scaler.fit_transform(validatehist[['Open', 'High','Low']])
#dataY = y_scaler.fit_transform(df[['Close']])

In [27]:
val_rescaled = validatehist_x.reshape(1, validatehist_x.shape[0], validatehist_x.shape[1])

In [28]:
Predicted_results = Trained_model.predict(val_rescaled)

In [29]:
Predicted_results

array([[0.85437167, 0.825097  , 0.85728365, 0.8246887 , 0.83759046,
        0.83964086, 0.84814143, 0.8378514 , 0.8242008 , 0.8411345 ]],
      dtype=float32)

In [30]:
Predicted_results_Inv_trans = y_scaler.inverse_transform(Predicted_results)

In [31]:
Predicted_results_Inv_trans

array([[189.16072, 183.46388, 189.7274 , 183.38441, 185.8951 , 186.29411,
        187.94832, 185.94588, 183.28947, 186.58476]], dtype=float32)

In [32]:
validatehori

Unnamed: 0,Open,High,Low,Close
1751,219.199997,219.880005,217.419998,218.059998
1752,219.600006,221.970001,219.210007,221.910004
1753,221.610001,222.380005,218.630005,219.059998
1754,220.610001,221.679993,220.139999,221.149994
1755,222.570007,222.630005,220.389999,221.770004
1756,222.029999,222.289993,220.529999,222.139999
1757,222.160004,222.369995,219.119995,221.440002
1758,222.309998,222.75,221.279999,221.320007
1759,220.75,221.490005,219.270004,219.759995
1760,220.800003,221.779999,216.110001,217.940002


In [33]:
from sklearn import metrics
def timeseries_evaluation_metrics_func(y_true, y_pred):
    
    def mean_absolute_percentage_error(y_true, y_pred): 
        y_true, y_pred = np.array(y_true), np.array(y_pred)
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    print('Evaluation metric results:-')
    print(f'MSE is : {metrics.mean_squared_error(y_true, y_pred)}')
    print(f'MAE is : {metrics.mean_absolute_error(y_true, y_pred)}')
    print(f'RMSE is : {np.sqrt(metrics.mean_squared_error(y_true, y_pred))}')
    print(f'MAPE is : {mean_absolute_percentage_error(y_true, y_pred)}')
    #print(f'R2 is : {metrics.r2_score(y_true, y_pred)}',end='\n\n')

In [34]:
timeseries_evaluation_metrics_func(validatehori['Close'],Predicted_results_Inv_trans[0])

Evaluation metric results:-
MSE is : 1185.8121076487512
MAE is : 34.28559417207031
RMSE is : 34.43562265516265
MAPE is : 15.544881990813813
