# Model Training Scores
Each model will be tested against other stocks in the list

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from math import sqrt
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from os import makedirs

## Data Preprocessing

In [2]:
# Loading of dataset (PSEI)
# We will only use the closing prices, hence we set usecols to index:4
data = np.genfromtxt('data/PSEI.csv', delimiter=',', skip_header=1, usecols=4)
data = np.log(data)
print(f"Length of data: {len(data)}")
print(f"Closing Prices: {data}")

Length of data: 5675
Closing Prices: [7.66938787 7.6747011  7.63759594 ... 8.84261604 8.83095082 8.83590725]


In [3]:
# Splitting of dataset for training (80%)
# Split data into training and testing
len_train = int(len(data) * 0.80)
data_train = data[:len_train]

print(f"Length of data_train: {len(data_train)}\nData Train: {data_train}\n")

Length of data_train: 4540
Data Train: [7.66938787 7.6747011  7.63759594 ... 8.89114562 8.9022406  8.88648797]



In [4]:
# Creating a windowed datasets for training, using the following window sizes (5, 10, 15, 20)
windowed_sizes = [5, 10, 15, 20]
windowed_data_train = [0, 0, 0, 0]

idx_counter = 0
for window_size in windowed_sizes:
    train_data = np.zeros((len(data_train) - window_size, window_size))
    for i in range(len(data_train) - window_size):
        train_data[i] = data_train[i:i + window_size]
    windowed_data_train[idx_counter] = train_data
    idx_counter += 1

print(f"Length of windowed_data_train: {len(windowed_data_train)}\n")
# Print the windowed data for each window sizes
idx_counter = 0
for items in windowed_data_train:
    print(f"Window Size: {windowed_sizes[idx_counter]} \
          \nwindowed_data_train_{windowed_sizes[idx_counter]}: \
          {windowed_data_train[idx_counter]}\n\n")
    idx_counter += 1

Length of windowed_data_train: 4

Window Size: 5           
windowed_data_train_5:           [[7.66938787 7.6747011  7.63759594 7.6396952  7.64696987]
 [7.6747011  7.63759594 7.6396952  7.64696987 7.66961196]
 [7.63759594 7.6396952  7.64696987 7.66961196 7.66892553]
 ...
 [8.8546949  8.87855732 8.86313049 8.88095814 8.88571212]
 [8.87855732 8.86313049 8.88095814 8.88571212 8.89114562]
 [8.86313049 8.88095814 8.88571212 8.89114562 8.9022406 ]]


Window Size: 10           
windowed_data_train_10:           [[7.66938787 7.6747011  7.63759594 ... 7.65017822 7.6422843  7.65069212]
 [7.6747011  7.63759594 7.6396952  ... 7.6422843  7.65069212 7.64837271]
 [7.63759594 7.6396952  7.64696987 ... 7.65069212 7.64837271 7.63763932]
 ...
 [8.89735553 8.89035822 8.86758947 ... 8.86313049 8.88095814 8.88571212]
 [8.89035822 8.86758947 8.86265349 ... 8.88095814 8.88571212 8.89114562]
 [8.86758947 8.86265349 8.85178938 ... 8.88571212 8.89114562 8.9022406 ]]


Window Size: 15           
windowed_data_tra

In [5]:
y_data_train = [[], [], [], []]

# For y_data_train, in window_size = 5
for i in range(len(windowed_data_train[0])):
    y_data_train[0].append(windowed_data_train[0][i][1])

# For y_data_train, in window_size = 10
for i in range(len(windowed_data_train[1])):
    y_data_train[1].append(windowed_data_train[1][i][1])

# For y_data_train, in window_size = 15
for i in range(len(windowed_data_train[2])):
    y_data_train[2].append(windowed_data_train[2][i][1])

# For y_data_train, in window_size = 20
for i in range(len(windowed_data_train[3])):
    y_data_train[3].append(windowed_data_train[3][i][1])

y_data_train

[[7.674701098142828,
  7.637595943478558,
  7.639695196511235,
  7.646969872767956,
  7.669611957549228,
  7.668925527588327,
  7.650178220076227,
  7.642284297320852,
  7.650692124044598,
  7.648372705383401,
  7.637639321258056,
  7.624877741741785,
  7.627237624842876,
  7.631751690957568,
  7.628303595890022,
  7.624267361846815,
  7.60019721091272,
  7.58842493803097,
  7.585479129112174,
  7.595603444528208,
  7.587533491803128,
  7.588809635187943,
  7.599967022156925,
  7.605322675975573,
  7.603688574716181,
  7.6179758757313705,
  7.624262477305168,
  7.610927045064047,
  7.599506485623733,
  7.58920430212959,
  7.56944275172379,
  7.546947731944156,
  7.533538617608549,
  7.541301064050994,
  7.514167408007421,
  7.495447494979654,
  7.511486375378529,
  7.492654445737744,
  7.450457405395387,
  7.4036337485408446,
  7.410921645380639,
  7.4190447951334155,
  7.436469935583868,
  7.460662903127495,
  7.430541093666842,
  7.402323463838811,
  7.39446237533932,
  7.39118092855

In [6]:
# Dictionary for storing the scores of each model
# model : [MSE, RMSE, MAE, MAPE]
scores = {
    'baseline5': [0, 0, 0, 0],
    'baseline10': [0, 0, 0, 0],
    'baseline15': [0, 0, 0, 0],
    'baseline20': [0, 0, 0, 0],
    'dmd-lstm5': [0, 0, 0, 0],
    'dmd-lstm10': [0, 0, 0, 0],
    'dmd-lstm15': [0, 0, 0, 0],
    'dmd-lstm20': [0, 0, 0, 0],
}

## Predicting each stocks for every model

In [7]:
# Window Size = 5 (Baseline)
model = tf.keras.models.load_model('./exported_models/model_baseline5.keras')
y_pred = model.predict(windowed_data_train[0])
scores['baseline5'][0] = mean_squared_error(y_data_train[0], y_pred)
scores['baseline5'][1] = sqrt(mean_squared_error(y_data_train[0], y_pred))
scores['baseline5'][2] = mean_absolute_error(y_data_train[0], y_pred)
scores['baseline5'][3] = mean_absolute_percentage_error(y_data_train[0], y_pred)

# Window Size = 10 (Baseline)
model = tf.keras.models.load_model('./exported_models/model_baseline10.keras')
y_pred = model.predict(windowed_data_train[1])
scores['baseline10'][0] = mean_squared_error(y_data_train[1], y_pred)
scores['baseline10'][1] = sqrt(mean_squared_error(y_data_train[1], y_pred))
scores['baseline10'][2] = mean_absolute_error(y_data_train[1], y_pred)
scores['baseline10'][3] = mean_absolute_percentage_error(y_data_train[1], y_pred)

# Window Size = 15 (Baseline)
model = tf.keras.models.load_model('./exported_models/model_baseline15.keras')
y_pred = model.predict(windowed_data_train[2])
scores['baseline15'][0] = mean_squared_error(y_data_train[2], y_pred)
scores['baseline15'][1] = sqrt(mean_squared_error(y_data_train[2], y_pred))
scores['baseline15'][2] = mean_absolute_error(y_data_train[2], y_pred)
scores['baseline15'][3] = mean_absolute_percentage_error(y_data_train[2], y_pred)

# Window Size = 20 (Baseline)
model = tf.keras.models.load_model('./exported_models/model_baseline20.keras')
y_pred = model.predict(windowed_data_train[3])
scores['baseline20'][0] = mean_squared_error(y_data_train[3], y_pred)
scores['baseline20'][1] = sqrt(mean_squared_error(y_data_train[3], y_pred))
scores['baseline20'][2] = mean_absolute_error(y_data_train[3], y_pred)
scores['baseline20'][3] = mean_absolute_percentage_error(y_data_train[3], y_pred)

# Window Size = 5 (DMD-LSTM)
model = tf.keras.models.load_model('./exported_models/model_s5.keras')
y_pred = model.predict(windowed_data_train[0])
scores['dmd-lstm5'][0] = mean_squared_error(y_data_train[0], y_pred)
scores['dmd-lstm5'][1] = sqrt(mean_squared_error(y_data_train[0], y_pred))
scores['dmd-lstm5'][2] = mean_absolute_error(y_data_train[0], y_pred)
scores['dmd-lstm5'][3] = mean_absolute_percentage_error(y_data_train[0], y_pred)

# Window Size = 10 (DMD-LSTM)
model = tf.keras.models.load_model('./exported_models/model_s10.keras')
y_pred = model.predict(windowed_data_train[1])
scores['dmd-lstm10'][0] = mean_squared_error(y_data_train[1], y_pred)
scores['dmd-lstm10'][1] = sqrt(mean_squared_error(y_data_train[1], y_pred))
scores['dmd-lstm10'][2] = mean_absolute_error(y_data_train[1], y_pred)
scores['dmd-lstm10'][3] = mean_absolute_percentage_error(y_data_train[1], y_pred)

# Window Size = 15 (DMD-LSTM)
model = tf.keras.models.load_model('./exported_models/model_s15.keras')
y_pred = model.predict(windowed_data_train[2])
scores['dmd-lstm15'][0] = mean_squared_error(y_data_train[2], y_pred)
scores['dmd-lstm15'][1] = sqrt(mean_squared_error(y_data_train[2], y_pred))
scores['dmd-lstm15'][2] = mean_absolute_error(y_data_train[2], y_pred)
scores['dmd-lstm15'][3] = mean_absolute_percentage_error(y_data_train[2], y_pred)

# Window Size = 20 (DMD-LSTM)
model = tf.keras.models.load_model('./exported_models/model_s20.keras')
y_pred = model.predict(windowed_data_train[3])
scores['dmd-lstm20'][0] = mean_squared_error(y_data_train[3], y_pred)
scores['dmd-lstm20'][1] = sqrt(mean_squared_error(y_data_train[3], y_pred))
scores['dmd-lstm20'][2] = mean_absolute_error(y_data_train[3], y_pred)
scores['dmd-lstm20'][3] = mean_absolute_percentage_error(y_data_train[3], y_pred)



In [8]:
# Create DataFrame to show the scores
df = pd.DataFrame(scores, index=['MSE', 'RMSE', 'MAE', 'MAPE'])
df

Unnamed: 0,baseline5,baseline10,baseline15,baseline20,dmd-lstm5,dmd-lstm10,dmd-lstm15,dmd-lstm20
MSE,0.075415,0.000326,0.016161,0.002316,7e-06,2.7e-05,8.679435e-07,2.6e-05
RMSE,0.274618,0.018044,0.127126,0.048122,0.002709,0.005242,0.0009316349,0.005106
MAE,0.274279,0.017773,0.126845,0.047692,0.002709,0.00524,0.0009315393,0.005106
MAPE,0.034281,0.00221,0.015863,0.005981,0.000339,0.000654,0.0001163191,0.000638
