# Model Training Scores
Each model will be tested against other stocks in the list

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from math import sqrt
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from os import makedirs

## Data Preprocessing

In [2]:
# Loading of dataset (PSEI)
# We will only use the closing prices, hence we set usecols to index:4
data = np.genfromtxt('data/PSEI.csv', delimiter=',', skip_header=1, usecols=4)
print(f"Length of data: {len(data)}")
print(f"Closing Prices: {data}")

Length of data: 5675
Closing Prices: [2141.77 2153.18 2074.75 ... 6923.08 6842.79 6876.79]


In [3]:
# Splitting of dataset for training (80%)
# Split data into training and testing
len_train = int(len(data) * 0.80)
data_train = data[:len_train]

print(f"Length of data_train: {len(data_train)}\nData Train: {data_train}\n")

Length of data_train: 4540
Data Train: [2141.77 2153.18 2074.75 ... 7267.34 7348.42 7233.57]



In [4]:
# Creating a windowed datasets for training, using the following window sizes (5, 10, 15, 20)
windowed_sizes = [5, 10, 15, 20]
windowed_data_train = [0, 0, 0, 0]

idx_counter = 0
for window_size in windowed_sizes:
    train_data = np.zeros((len(data_train) - window_size, window_size))
    for i in range(len(data_train) - window_size):
        train_data[i] = data_train[i:i + window_size]
    windowed_data_train[idx_counter] = train_data
    idx_counter += 1

print(f"Length of windowed_data_train: {len(windowed_data_train)}\n")
# Print the windowed data for each window sizes
idx_counter = 0
for items in windowed_data_train:
    print(f"Window Size: {windowed_sizes[idx_counter]} \
          \nwindowed_data_train_{windowed_sizes[idx_counter]}: \
          {windowed_data_train[idx_counter]}\n\n")
    idx_counter += 1

Length of windowed_data_train: 4

Window Size: 5           
windowed_data_train_5:           [[2141.77 2153.18 2074.75 2079.11 2094.29]
 [2153.18 2074.75 2079.11 2094.29 2142.25]
 [2074.75 2079.11 2094.29 2142.25 2140.78]
 ...
 [7007.21 7176.43 7066.57 7193.68 7227.96]
 [7176.43 7066.57 7193.68 7227.96 7267.34]
 [7066.57 7193.68 7227.96 7267.34 7348.42]]


Window Size: 10           
windowed_data_train_10:           [[2141.77 2153.18 2074.75 ... 2101.02 2084.5  2102.1 ]
 [2153.18 2074.75 2079.11 ... 2084.5  2102.1  2097.23]
 [2074.75 2079.11 2094.29 ... 2102.1  2097.23 2074.84]
 ...
 [7312.61 7261.62 7098.15 ... 7066.57 7193.68 7227.96]
 [7261.62 7098.15 7063.2  ... 7193.68 7227.96 7267.34]
 [7098.15 7063.2  6986.88 ... 7227.96 7267.34 7348.42]]


Window Size: 15           
windowed_data_train_15:           [[2141.77 2153.18 2074.75 ... 2048.53 2053.37 2062.66]
 [2153.18 2074.75 2079.11 ... 2053.37 2062.66 2055.56]
 [2074.75 2079.11 2094.29 ... 2062.66 2055.56 2047.28]
 ...
 [7740.74 7

In [5]:
y_data_train = [[], [], [], []]

# For y_data_train, in window_size = 5
for i in range(len(windowed_data_train[0])):
    y_data_train[0].append(windowed_data_train[0][i][1])

# For y_data_train, in window_size = 10
for i in range(len(windowed_data_train[1])):
    y_data_train[1].append(windowed_data_train[1][i][1])

# For y_data_train, in window_size = 15
for i in range(len(windowed_data_train[2])):
    y_data_train[2].append(windowed_data_train[2][i][1])

# For y_data_train, in window_size = 20
for i in range(len(windowed_data_train[3])):
    y_data_train[3].append(windowed_data_train[3][i][1])

y_data_train

[[2153.18,
  2074.75,
  2079.11,
  2094.29,
  2142.25,
  2140.78,
  2101.02,
  2084.5,
  2102.1,
  2097.23,
  2074.84,
  2048.53,
  2053.37,
  2062.66,
  2055.56,
  2047.28,
  1998.59,
  1975.2,
  1969.39,
  1989.43,
  1973.44,
  1975.96,
  1998.13,
  2008.86,
  2005.58,
  2034.44,
  2047.27,
  2020.15,
  1997.21,
  1976.74,
  1938.06,
  1894.95,
  1869.71,
  1884.28,
  1833.84,
  1799.83,
  1828.93,
  1794.81,
  1720.65,
  1641.94,
  1653.95,
  1667.44,
  1696.75,
  1738.3,
  1686.72,
  1639.79,
  1626.95,
  1621.62,
  1602.96,
  1633.33,
  1638.6,
  1629.97,
  1646.28,
  1657.07,
  1651.4,
  1646.91,
  1680.58,
  1681.71,
  1697.25,
  1701.08,
  1691.14,
  1697.81,
  1681.72,
  1688.32,
  1682.53,
  1691.05,
  1713.24,
  1738.49,
  1796.66,
  1757.45,
  1744.57,
  1725.98,
  1712.54,
  1637.54,
  1648.13,
  1664.0,
  1664.46,
  1647.56,
  1639.83,
  1620.53,
  1598.73,
  1606.04,
  1592.68,
  1553.34,
  1551.9,
  1518.61,
  1523.43,
  1522.96,
  1505.21,
  1539.31,
  1517.09,
  1507.

In [6]:
# Dictionary for storing the scores of each model
# model : [MSE, RMSE, MAE, MAPE]
scores = {
    'baseline5': [0, 0, 0, 0],
    'baseline10': [0, 0, 0, 0],
    'baseline15': [0, 0, 0, 0],
    'baseline20': [0, 0, 0, 0],
    'dmd-lstm5': [0, 0, 0, 0],
    'dmd-lstm10': [0, 0, 0, 0],
    'dmd-lstm15': [0, 0, 0, 0],
    'dmd-lstm20': [0, 0, 0, 0],
}

## Predicting each stocks for every model

In [7]:
# Window Size = 5 (Baseline)
model = tf.keras.models.load_model('./exported_models/model_baseline5.keras')
y_pred = model.predict(windowed_data_train[0])
scores['baseline5'][0] = mean_squared_error(y_data_train[0], y_pred)
scores['baseline5'][1] = sqrt(mean_squared_error(y_data_train[0], y_pred))
scores['baseline5'][2] = mean_absolute_error(y_data_train[0], y_pred)
scores['baseline5'][3] = mean_absolute_percentage_error(y_data_train[0], y_pred)

# Window Size = 10 (Baseline)
model = tf.keras.models.load_model('./exported_models/model_baseline10.keras')
y_pred = model.predict(windowed_data_train[1])
scores['baseline10'][0] = mean_squared_error(y_data_train[1], y_pred)
scores['baseline10'][1] = sqrt(mean_squared_error(y_data_train[1], y_pred))
scores['baseline10'][2] = mean_absolute_error(y_data_train[1], y_pred)
scores['baseline10'][3] = mean_absolute_percentage_error(y_data_train[1], y_pred)

# Window Size = 15 (Baseline)
model = tf.keras.models.load_model('./exported_models/model_baseline15.keras')
y_pred = model.predict(windowed_data_train[2])
scores['baseline15'][0] = mean_squared_error(y_data_train[2], y_pred)
scores['baseline15'][1] = sqrt(mean_squared_error(y_data_train[2], y_pred))
scores['baseline15'][2] = mean_absolute_error(y_data_train[2], y_pred)
scores['baseline15'][3] = mean_absolute_percentage_error(y_data_train[2], y_pred)

# Window Size = 20 (Baseline)
model = tf.keras.models.load_model('./exported_models/model_baseline20.keras')
y_pred = model.predict(windowed_data_train[3])
scores['baseline20'][0] = mean_squared_error(y_data_train[3], y_pred)
scores['baseline20'][1] = sqrt(mean_squared_error(y_data_train[3], y_pred))
scores['baseline20'][2] = mean_absolute_error(y_data_train[3], y_pred)
scores['baseline20'][3] = mean_absolute_percentage_error(y_data_train[3], y_pred)

# Window Size = 5 (DMD-LSTM)
model = tf.keras.models.load_model('./exported_models/model_s5.keras')
y_pred = model.predict(windowed_data_train[0])
scores['dmd-lstm5'][0] = mean_squared_error(y_data_train[0], y_pred)
scores['dmd-lstm5'][1] = sqrt(mean_squared_error(y_data_train[0], y_pred))
scores['dmd-lstm5'][2] = mean_absolute_error(y_data_train[0], y_pred)
scores['dmd-lstm5'][3] = mean_absolute_percentage_error(y_data_train[0], y_pred)

# Window Size = 10 (DMD-LSTM)
model = tf.keras.models.load_model('./exported_models/model_s10.keras')
y_pred = model.predict(windowed_data_train[1])
scores['dmd-lstm10'][0] = mean_squared_error(y_data_train[1], y_pred)
scores['dmd-lstm10'][1] = sqrt(mean_squared_error(y_data_train[1], y_pred))
scores['dmd-lstm10'][2] = mean_absolute_error(y_data_train[1], y_pred)
scores['dmd-lstm10'][3] = mean_absolute_percentage_error(y_data_train[1], y_pred)

# Window Size = 15 (DMD-LSTM)
model = tf.keras.models.load_model('./exported_models/model_s15.keras')
y_pred = model.predict(windowed_data_train[2])
scores['dmd-lstm15'][0] = mean_squared_error(y_data_train[2], y_pred)
scores['dmd-lstm15'][1] = sqrt(mean_squared_error(y_data_train[2], y_pred))
scores['dmd-lstm15'][2] = mean_absolute_error(y_data_train[2], y_pred)
scores['dmd-lstm15'][3] = mean_absolute_percentage_error(y_data_train[2], y_pred)

# Window Size = 20 (DMD-LSTM)
model = tf.keras.models.load_model('./exported_models/model_s20.keras')
y_pred = model.predict(windowed_data_train[3])
scores['dmd-lstm20'][0] = mean_squared_error(y_data_train[3], y_pred)
scores['dmd-lstm20'][1] = sqrt(mean_squared_error(y_data_train[3], y_pred))
scores['dmd-lstm20'][2] = mean_absolute_error(y_data_train[3], y_pred)
scores['dmd-lstm20'][3] = mean_absolute_percentage_error(y_data_train[3], y_pred)



In [8]:
# Create DataFrame to show the scores
df = pd.DataFrame(scores, index=['MSE', 'RMSE', 'MAE', 'MAPE'])
df

Unnamed: 0,baseline5,baseline10,baseline15,baseline20,dmd-lstm5,dmd-lstm10,dmd-lstm15,dmd-lstm20
MSE,2912.840703,191.935882,1118.183283,706.136814,3.7e-05,0.787877,0.006917,0.057851
RMSE,53.970739,13.854093,33.439248,26.573235,0.006106,0.887624,0.083166,0.240522
MAE,35.301888,9.480864,22.09972,18.285352,0.004175,0.755407,0.067645,0.202746
MAPE,0.009618,0.002527,0.006024,0.005004,1e-06,0.000194,1.7e-05,5.3e-05
