# Dynamic Mode Decomposition and Long Short-Term Memory (Training)

In [1]:
import numpy as np
from pydmd import DMD
import tensorflow as tf

## Data Preprocessing
1. Loading of dataset
2. Splitting of dataset for (training [80%] and testing [20%])
3. Windowed Dataset (window_sizes = [5, 10, 15, and 20])
4. Acquiring the dynamic modes using pyDMD (This will be used as a weight layer for the LSTM model)

In [2]:
# Loading of dataset (PSEI)
# We will only use the closing prices, hence we set usecols to index:4
data = np.genfromtxt('data/PSEI.csv', delimiter=',', skip_header=1, usecols=4)
print(f"Length of data: {len(data)}")
print(f"Closing Prices: {data}")

Length of data: 5675
Closing Prices: [2141.77 2153.18 2074.75 ... 6923.08 6842.79 6876.79]


In [3]:
# Splitting of dataset for training (80%) and testing (20%)
# Split data into training and testing
len_train = int(len(data) * 0.80)
data_train = data[:len_train]
data_test = data[len_train:]

print(f"Length of data_train: {len(data_train)}\nData Train: {data_train}\n")
print(f"Length of data_test: {len(data_test)}\nData Test: {data_test}")

Length of data_train: 4540
Data Train: [2141.77 2153.18 2074.75 ... 7267.34 7348.42 7233.57]

Length of data_test: 1135
Data Test: [7186.71 7186.62 7233.29 ... 6923.08 6842.79 6876.79]


In [4]:
# Creating a windowed datasets for training, using the following window sizes (5, 10, 15, 20)
windowed_sizes = [5, 10, 15, 20]
windowed_data_train = [0, 0, 0, 0]

idx_counter = 0
for window_size in windowed_sizes:
    train_data = np.zeros((len(data_train) - window_size, window_size))
    for i in range(len(data_train) - window_size):
        train_data[i] = data_train[i:i + window_size]
    windowed_data_train[idx_counter] = train_data
    idx_counter += 1

print(f"Length of windowed_data_train: {len(windowed_data_train)}\n")
# Print the windowed data for each window sizes
idx_counter = 0
for items in windowed_data_train:
    print(f"Window Size: {windowed_sizes[idx_counter]} \
          \nwindowed_data_train_{windowed_sizes[idx_counter]}: \
          {windowed_data_train[idx_counter]}\n\n")
    idx_counter += 1

Length of windowed_data_train: 4

Window Size: 5           
windowed_data_train_5:           [[2141.77 2153.18 2074.75 2079.11 2094.29]
 [2153.18 2074.75 2079.11 2094.29 2142.25]
 [2074.75 2079.11 2094.29 2142.25 2140.78]
 ...
 [7007.21 7176.43 7066.57 7193.68 7227.96]
 [7176.43 7066.57 7193.68 7227.96 7267.34]
 [7066.57 7193.68 7227.96 7267.34 7348.42]]


Window Size: 10           
windowed_data_train_10:           [[2141.77 2153.18 2074.75 ... 2101.02 2084.5  2102.1 ]
 [2153.18 2074.75 2079.11 ... 2084.5  2102.1  2097.23]
 [2074.75 2079.11 2094.29 ... 2102.1  2097.23 2074.84]
 ...
 [7312.61 7261.62 7098.15 ... 7066.57 7193.68 7227.96]
 [7261.62 7098.15 7063.2  ... 7193.68 7227.96 7267.34]
 [7098.15 7063.2  6986.88 ... 7227.96 7267.34 7348.42]]


Window Size: 15           
windowed_data_train_15:           [[2141.77 2153.18 2074.75 ... 2048.53 2053.37 2062.66]
 [2153.18 2074.75 2079.11 ... 2053.37 2062.66 2055.56]
 [2074.75 2079.11 2094.29 ... 2062.66 2055.56 2047.28]
 ...
 [7740.74 7

In [5]:
y_data_train = [[], [], [], []]

# For y_data_train, in window_size = 5
for i in range(len(windowed_data_train[0])):
    y_data_train[0].append(windowed_data_train[0][i][1])

# For y_data_train, in window_size = 10
for i in range(len(windowed_data_train[1])):
    y_data_train[1].append(windowed_data_train[1][i][1])

# For y_data_train, in window_size = 15
for i in range(len(windowed_data_train[2])):
    y_data_train[2].append(windowed_data_train[2][i][1])

# For y_data_train, in window_size = 20
for i in range(len(windowed_data_train[3])):
    y_data_train[3].append(windowed_data_train[3][i][1])

y_data_train

[[2153.18,
  2074.75,
  2079.11,
  2094.29,
  2142.25,
  2140.78,
  2101.02,
  2084.5,
  2102.1,
  2097.23,
  2074.84,
  2048.53,
  2053.37,
  2062.66,
  2055.56,
  2047.28,
  1998.59,
  1975.2,
  1969.39,
  1989.43,
  1973.44,
  1975.96,
  1998.13,
  2008.86,
  2005.58,
  2034.44,
  2047.27,
  2020.15,
  1997.21,
  1976.74,
  1938.06,
  1894.95,
  1869.71,
  1884.28,
  1833.84,
  1799.83,
  1828.93,
  1794.81,
  1720.65,
  1641.94,
  1653.95,
  1667.44,
  1696.75,
  1738.3,
  1686.72,
  1639.79,
  1626.95,
  1621.62,
  1602.96,
  1633.33,
  1638.6,
  1629.97,
  1646.28,
  1657.07,
  1651.4,
  1646.91,
  1680.58,
  1681.71,
  1697.25,
  1701.08,
  1691.14,
  1697.81,
  1681.72,
  1688.32,
  1682.53,
  1691.05,
  1713.24,
  1738.49,
  1796.66,
  1757.45,
  1744.57,
  1725.98,
  1712.54,
  1637.54,
  1648.13,
  1664.0,
  1664.46,
  1647.56,
  1639.83,
  1620.53,
  1598.73,
  1606.04,
  1592.68,
  1553.34,
  1551.9,
  1518.61,
  1523.43,
  1522.96,
  1505.21,
  1539.31,
  1517.09,
  1507.

In [6]:
# Creating a windowed datasets for testing, using the following window sizes (5, 10, 15, 20)
windowed_sizes = [5, 10, 15, 20]
windowed_data_test = [0, 0, 0, 0]

idx_counter = 0
for window_size in windowed_sizes:
    test_data = np.zeros((len(data_test) - window_size, window_size))
    for i in range(len(data_test) - window_size):
        test_data[i] = data_test[i:i + window_size]
    windowed_data_test[idx_counter] = test_data
    idx_counter += 1

print(f"Length of windowed_data_test: {len(windowed_data_test)}\n")
# Print the windowed data for each window sizes
idx_counter = 0
for items in windowed_data_test:
    print(f"Window Size: {windowed_sizes[idx_counter]} \
          \nwindowed_data_test_{windowed_sizes[idx_counter]}: \
          {windowed_data_test[idx_counter]}\n\n")
    idx_counter += 1

Length of windowed_data_test: 4

Window Size: 5           
windowed_data_test_5:           [[7186.71 7186.62 7233.29 7333.73 7350.58]
 [7186.62 7233.29 7333.73 7350.58 7399.18]
 [7233.29 7333.73 7350.58 7399.18 7369.44]
 ...
 [7035.76 6986.19 7027.38 6936.61 6881.26]
 [6986.19 7027.38 6936.61 6881.26 6923.08]
 [7027.38 6936.61 6881.26 6923.08 6842.79]]


Window Size: 10           
windowed_data_test_10:           [[7186.71 7186.62 7233.29 ... 7381.68 7451.37 7387.87]
 [7186.62 7233.29 7333.73 ... 7451.37 7387.87 7399.61]
 [7233.29 7333.73 7350.58 ... 7387.87 7399.61 7376.8 ]
 ...
 [7081.36 7042.7  7052.16 ... 7027.38 6936.61 6881.26]
 [7042.7  7052.16 6970.97 ... 6936.61 6881.26 6923.08]
 [7052.16 6970.97 6793.25 ... 6881.26 6923.08 6842.79]]


Window Size: 15           
windowed_data_test_15:           [[7186.71 7186.62 7233.29 ... 7447.02 7514.   7665.85]
 [7186.62 7233.29 7333.73 ... 7514.   7665.85 7701.38]
 [7233.29 7333.73 7350.58 ... 7665.85 7701.38 7773.32]
 ...
 [7094.86 7062.

In [7]:
y_data_test = [[], [], [], []]

# For y_data_test, in window_size = 5
for i in range(len(windowed_data_test[0])):
    y_data_test[0].append(windowed_data_test[0][i][1])

# For y_data_test, in window_size = 10
for i in range(len(windowed_data_test[1])):
    y_data_test[1].append(windowed_data_test[1][i][1])

# For y_data_test, in window_size = 15
for i in range(len(windowed_data_test[2])):
    y_data_test[2].append(windowed_data_test[2][i][1])

# For y_data_test, in window_size = 20
for i in range(len(windowed_data_test[3])):
    y_data_test[3].append(windowed_data_test[3][i][1])

y_data_test

[[7186.62,
  7233.29,
  7333.73,
  7350.58,
  7399.18,
  7369.44,
  7381.68,
  7451.37,
  7387.87,
  7399.61,
  7376.8,
  7447.02,
  7514.0,
  7665.85,
  7701.38,
  7773.32,
  7672.0,
  7838.22,
  7759.55,
  7819.39,
  7817.31,
  7725.85,
  7851.46,
  7820.71,
  7804.98,
  7635.27,
  7527.78,
  7540.92,
  7517.36,
  7583.52,
  7500.53,
  7632.26,
  7804.03,
  7766.47,
  7844.61,
  7830.96,
  7853.16,
  7855.71,
  7832.22,
  7881.82,
  7752.27,
  7638.71,
  7598.64,
  7596.15,
  7518.01,
  7449.2,
  7517.37,
  7413.15,
  7413.56,
  7286.34,
  7221.23,
  7134.73,
  7383.0,
  7433.61,
  7332.17,
  7268.21,
  7320.59,
  7276.82,
  7222.08,
  7132.36,
  7210.87,
  7093.34,
  7078.2,
  7050.82,
  7059.38,
  7001.14,
  6884.38,
  7004.77,
  6926.51,
  6987.02,
  7099.68,
  7141.25,
  7151.52,
  7236.16,
  7197.62,
  7129.42,
  6966.84,
  7064.33,
  7109.03,
  7016.06,
  7140.29,
  7213.44,
  7180.11,
  7033.93,
  7035.71,
  6968.82,
  6926.2,
  6843.83,
  6923.08,
  6952.59,
  7083.34,
  7270

In [8]:
# For each windowed_data_train, we will get the dynamic modes for each

# Intialize the list for the generated dynamic modes for each windowed_data_train items
dynamic_modes = [0, 0, 0, 0]

idx_counter = 0
for data_train in windowed_data_train:
    # Create a DMD object
    """
    About the parameters:

    svd_rank: 1
        Rank for truncation (stability of DMD against data noise)
        Read: https://arxiv.org/abs/2107.11999
        svd_rank is set to 1

    tlsq_rank: 1
        In connection to the suggesting reading for svd_rank,
        Total Least Square ranking will be used.

    exact: True
        Set to true to find the exact DMD

    opt: True
        Set to true to find the optimal DMD

    """
    dmd = DMD(svd_rank=1, tlsq_rank=1, exact=True, opt=True)
    # Fit the data
    dmd.fit(data_train.T)
    dynamic_modes[idx_counter] = dmd.modes
    idx_counter += 1

print(f"Dyanmic Modes: {dynamic_modes}")

Dyanmic Modes: [array([[-0.44710127],
       [-0.44722249],
       [-0.44733435],
       [-0.44743621],
       [-0.4475299 ]]), array([[-0.3159506 ],
       [-0.31603756],
       [-0.3161196 ],
       [-0.3161986 ],
       [-0.31627813],
       [-0.31635141],
       [-0.31642433],
       [-0.31649483],
       [-0.31656269],
       [-0.31662868]]), array([[-0.25778909],
       [-0.25787049],
       [-0.25794797],
       [-0.25802149],
       [-0.2580921 ],
       [-0.25815749],
       [-0.25822045],
       [-0.25828041],
       [-0.25833911],
       [-0.25839925],
       [-0.2584555 ],
       [-0.25851251],
       [-0.25856834],
       [-0.25862279],
       [-0.25867663]]), array([[-0.2230663 ],
       [-0.22314057],
       [-0.22321581],
       [-0.22328865],
       [-0.22336068],
       [-0.22342834],
       [-0.22349352],
       [-0.22355573],
       [-0.2236151 ],
       [-0.22367236],
       [-0.22372571],
       [-0.2237774 ],
       [-0.22382671],
       [-0.22387513],
       [-0

## Setting up the LSTM models

In [9]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=1_000)

In [10]:
# For window_size = 5
window_size = windowed_sizes[0]

model_s5 =  tf.keras.Sequential()
model_s5.add(tf.keras.layers.Dense(1, input_shape=(window_size,)))
model_s5.layers[0].set_weights([dynamic_modes[0], np.zeros(1)])
model_s5.layers[0].trainable = True

# Compile the model_s5
model_s5.compile(optimizer='adam',
                 loss='mse',
                 metrics=['mae', 
                          'mape', 
                          'mean_squared_error'])

# Train the model_s5
y_data_train0 = np.array(y_data_train[0])
y_data_test0 = np.array(y_data_test[0])
model_s5.fit(windowed_data_train[0], 
             y_data_train0, 
             epochs=10_000,
             verbose=0,
             validation_data=(windowed_data_test[0], y_data_test0),
             callbacks=[callback])

<keras.callbacks.History at 0x1f692917130>

In [11]:
# For window_size = 10
window_size = windowed_sizes[1]

model_s10 =  tf.keras.Sequential()
model_s10.add(tf.keras.layers.Dense(1, input_shape=(window_size,)))
model_s10.layers[0].set_weights([dynamic_modes[1], np.zeros(1)])
model_s10.layers[0].trainable = True

# Compile the model_s10
model_s10.compile(optimizer='adam',
                 loss='mse',
                 metrics=['mae', 
                          'mape', 
                          'mean_squared_error'])

# Train the model_s10
y_data_train1 = np.array(y_data_train[1])
y_data_test1 = np.array(y_data_test[1])
model_s10.fit(windowed_data_train[1], 
             y_data_train1, 
             epochs=10_000,
             verbose=0,
             validation_data=(windowed_data_test[1], y_data_test1),
             callbacks=[callback])

<keras.callbacks.History at 0x1f6927d6c10>

In [12]:
# For window_size = 15
window_size = windowed_sizes[2]

model_s15 =  tf.keras.Sequential()
model_s15.add(tf.keras.layers.Dense(1, input_shape=(window_size,)))
model_s15.layers[0].set_weights([dynamic_modes[2], np.zeros(1)])
model_s15.layers[0].trainable = True

# Compile the model_s15
model_s15.compile(optimizer='adam',
                 loss='mse',
                 metrics=['mae', 
                          'mape', 
                          'mean_squared_error'])

# Train the model_s15
y_data_train2 = np.array(y_data_train[2])
y_data_test2 = np.array(y_data_test[2])
model_s15.fit(windowed_data_train[2], 
             y_data_train2, 
             epochs=10_000,
             verbose=0,
             validation_data=(windowed_data_test[2], y_data_test2),
             callbacks=[callback])

<keras.callbacks.History at 0x1f697e29370>

In [13]:
# For window_size = 20
window_size = windowed_sizes[3]

model_s20 =  tf.keras.Sequential()
model_s20.add(tf.keras.layers.Dense(1, input_shape=(window_size,)))
model_s20.layers[0].set_weights([dynamic_modes[3], np.zeros(1)])
model_s20.layers[0].trainable = True

# Compile the model_s20
model_s20.compile(optimizer='adam',
                 loss='mse',
                 metrics=['mae', 
                          'mape', 
                          'mean_squared_error'])

# Train the model_s20
y_data_train3 = np.array(y_data_train[3])
y_data_test3 = np.array(y_data_test[3])
model_s20.fit(windowed_data_train[3], 
             y_data_train3, 
             epochs=10_000,
             verbose=0,
             validation_data=(windowed_data_test[3], y_data_test3),
             callbacks=[callback])

<keras.callbacks.History at 0x1f68fe5f8b0>

In [14]:
print("METRIC RESULTS")
print(f"For window_size = 5: {model_s5.get_metrics_result()}\n")
print(f"For window_size = 10: {model_s10.get_metrics_result()}\n")
print(f"For window_size = 15: {model_s15.get_metrics_result()}\n")
print(f"For window_size = 20: {model_s20.get_metrics_result()}\n")

METRIC RESULTS
For window_size = 5: {'loss': <tf.Tensor: shape=(), dtype=float32, numpy=0.00011331043>, 'mae': <tf.Tensor: shape=(), dtype=float32, numpy=0.008343128>, 'mape': <tf.Tensor: shape=(), dtype=float32, numpy=0.00012108202>, 'mean_squared_error': <tf.Tensor: shape=(), dtype=float32, numpy=0.00011331043>}

For window_size = 10: {'loss': <tf.Tensor: shape=(), dtype=float32, numpy=1.8361363>, 'mae': <tf.Tensor: shape=(), dtype=float32, numpy=1.3481805>, 'mape': <tf.Tensor: shape=(), dtype=float32, numpy=0.019301506>, 'mean_squared_error': <tf.Tensor: shape=(), dtype=float32, numpy=1.8361363>}

For window_size = 15: {'loss': <tf.Tensor: shape=(), dtype=float32, numpy=0.01658662>, 'mae': <tf.Tensor: shape=(), dtype=float32, numpy=0.121339634>, 'mape': <tf.Tensor: shape=(), dtype=float32, numpy=0.0017361421>, 'mean_squared_error': <tf.Tensor: shape=(), dtype=float32, numpy=0.01658662>}

For window_size = 20: {'loss': <tf.Tensor: shape=(), dtype=float32, numpy=0.13802193>, 'mae': <t

In [15]:
# Save Models
model_s5.save('exported_models/model_s5.keras')
model_s10.save('exported_models/model_s10.keras')
model_s15.save('exported_models/model_s15.keras')
model_s20.save('exported_models/model_s20.keras')