### Imports

In [1]:
import os
import numpy as np
import random
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore') 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # hopefully nothing explodes

import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
print(tf.version)
print(tf.config.list_physical_devices('GPU'))

<module 'tensorflow._api.v2.version' from '/home/zyzz/anaconda3/lib/python3.11/site-packages/tensorflow/_api/v2/version/__init__.py'>
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [2]:
# For reproducible results
seed = 42
os.environ['PYTHONHASHSEED']=str(seed)
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)  

## Data

In [3]:
# Constants
val_size = 0.2
data_path = 'training_dataset'
seq_length = 128     # predictions based on previous seq_length data entries
forecast_length = 9  # predicting forecast_length time steps into the future
sample_length = seq_length + forecast_length

In [4]:
# Read data (ignore categories)
training_data = np.load(os.path.join(data_path, 'training_data.npy'))
valid_periods = np.load(os.path.join(data_path, 'valid_periods.npy'))

# Filter out unvalid data
data = []
for i, row in enumerate(training_data):
    data.append(row[valid_periods[i][0]:valid_periods[i][1]])

print(f"({len(data)}, -)")

(48000, -)


In [5]:
# Convert time series to {x: sequences of length seq_length, y: values to be predicted from previous sequence}
def to_sequences(time_series):
    
    x = []
    y = []
    
    for i in range(time_series.shape[0]-seq_length-forecast_length+1):
        x.append(time_series[i:i+seq_length])
        y.append(time_series[i+seq_length:i+seq_length+forecast_length])  
    
    x = np.array(x)
    y = np.array(y)
    
    return {'x': x.reshape((x.shape[0], x.shape[1], -1)), 'y': y.reshape((y.shape[0], y.shape[1], -1))}

# Shuffle data (we don't want to make any assumptions about the order)
np.random.shuffle(data)
 
# Build sequences from the non-correlated time series, and append them to corresponding data set
# Note: there is no overlap between train and validation; each processed time series is used in train xor val
X_train, X_val = [], []
y_train, y_val = [], []
split_index = int((1-val_size)*len(data))
for i, time_series in enumerate(data): 
    if (len(time_series) >= sample_length): # assert we can draw at least one sample from the time_series
        sequences = to_sequences(time_series)
        if(i < split_index):
            X_train.append(sequences['x']) 
            y_train.append(sequences['y'])   
        else:
            X_val.append(sequences['x']) 
            y_val.append(sequences['y'])  

# Convert lists to nparrays 
X_train = np.concatenate(X_train, axis=0)
X_val = np.concatenate(X_val, axis=0)
y_train = np.concatenate(y_train, axis=0)
y_val = np.concatenate(y_val, axis=0)
print(X_train.shape, X_val.shape, y_train.shape, y_val.shape)

(3373049, 128, 1) (847359, 128, 1) (3373049, 9, 1) (847359, 9, 1)


## ML

In [6]:
input_shape = (X_train.shape[1], X_train.shape[2])
output_shape = (y_train.shape[1], y_train.shape[2])
batch_size = 128
lstm_units = 128
filters = 256
epochs = 1000

def build_model(input_shape, filters, lstm_units):
    input_layer = tfkl.Input(shape=input_shape)
      
    # Extract features
    cnn = tfkl.Conv1D(filters, 3, padding='same', activation='relu')(input_layer)
    cnn = tfkl.MaxPooling1D()(cnn)
    cnn = tfkl.Conv1D(filters//2, 3, padding='same', activation='relu')(cnn)
    cnn = tfkl.MaxPooling1D()(cnn)
    
    # Extract time dependencies
    lstm = tfkl.LSTM(units=lstm_units)(cnn)
    output_layer = tfkl.Dense(units=forecast_length)(lstm)

    # Compile using MSE as loss function and the Adam optimizer
    model = tfk.Model(inputs=input_layer, outputs=output_layer, name='LSTM-v3')
    model.compile(optimizer='adam', loss='mse')

    return model

# Build model and display summary
model = build_model(input_shape, filters, lstm_units)
model.summary()

Model: "LSTM-v3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 128, 1)]          0         
                                                                 
 conv1d (Conv1D)             (None, 128, 256)          1024      
                                                                 
 max_pooling1d (MaxPooling1  (None, 64, 256)           0         
 D)                                                              
                                                                 
 conv1d_1 (Conv1D)           (None, 64, 128)           98432     
                                                                 
 max_pooling1d_1 (MaxPoolin  (None, 32, 128)           0         
 g1D)                                                            
                                                                 
 lstm (LSTM)                 (None, 128)               1315

In [7]:
# Stop training when validation loss stops improving, maintain best weights
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,         # how many epochs to check for improvement before stopping
    restore_best_weights=True,
)
    
# Train model
history = model.fit(X_train,
                    y_train, 
                    batch_size=batch_size, 
                    epochs=epochs, 
                    validation_data=(X_val, y_val),
                    callbacks=early_stopping,
                    verbose=1)

# Evaluate on validation data
val_result = model.evaluate(X_val, y_val, verbose=0)
print(f"Val loss (MSE): {val_result}")

Epoch 1/1000


I0000 00:00:1702902482.644601     534 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Val loss (MSE): 0.005256765056401491


In [8]:
# Val loss for each prediction step
y_pred = model.predict(X_val)
for t in range(forecast_length):
    m = tf.keras.metrics.MeanSquaredError()
    m.update_state(y_val[:, t], y_pred[:, t])
    print(f'Val loss (MSE) {t+1}h forward: {m.result().numpy()}')

Val loss (MSE) 1h forward: 0.002577525097876787
Val loss (MSE) 2h forward: 0.0033835277426987886
Val loss (MSE) 3h forward: 0.004106225911527872
Val loss (MSE) 4h forward: 0.004802106413990259
Val loss (MSE) 5h forward: 0.0053682695142924786
Val loss (MSE) 6h forward: 0.005978988017886877
Val loss (MSE) 7h forward: 0.0065320273861289024
Val loss (MSE) 8h forward: 0.007024863269180059
Val loss (MSE) 9h forward: 0.0075372313149273396


In [9]:
# save model
model.save('LSTM_v3')

INFO:tensorflow:Assets written to: LSTM_v3/assets


INFO:tensorflow:Assets written to: LSTM_v3/assets
