### Imports

In [1]:
import os
import numpy as np
import random
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore') 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # hopefully nothing explodes

import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
print(tf.version)
print(tf.config.list_physical_devices('GPU'))

<module 'tensorflow._api.v2.version' from '/home/zyzz/anaconda3/lib/python3.11/site-packages/tensorflow/_api/v2/version/__init__.py'>
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [2]:
# For reproducible results
seed = 42
os.environ['PYTHONHASHSEED']=str(seed)
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)  

## Data

In [3]:
# Constants
val_size = 0.2
data_path = 'training_dataset'
seq_length = 128     # predictions based on previous seq_length data entries
forecast_length = 9  # predicting forecast_length time steps into the future
sample_length = seq_length + forecast_length

In [4]:
# Read data
categories = np.load(os.path.join(data_path, 'categories.npy'))
training_data = np.load(os.path.join(data_path, 'training_data.npy'))
valid_periods = np.load(os.path.join(data_path, 'valid_periods.npy'))

print(categories.shape, training_data.shape, valid_periods.shape)

(48000,) (48000, 2776) (48000, 2)


In [5]:
# Create a dictionary mapping each category to corresponding data
data = {category: [] for category in np.unique(categories)}
for i, time_series in enumerate(training_data):
    data[categories[i]].append(time_series[valid_periods[i][0]:valid_periods[i][1]])

# Print amount of time series for each category
formatted_strings = [f"{category}: {len(time_series_list)}" for category, time_series_list in data.items()]
print(", ".join(formatted_strings))

A: 5728, B: 10987, C: 10017, D: 10016, E: 10975, F: 277


In [6]:
# Convert time series to {x: sequences of length seq_length, y: values to be predicted from previous sequence}
def to_sequences(time_series):
    
    x = []
    y = []
    
    for i in range(time_series.shape[0]-seq_length-forecast_length+1):
        x.append(time_series[i:i+seq_length])
        y.append(time_series[i+seq_length:i+seq_length+forecast_length])  
    
    x = np.array(x)
    y = np.array(y)
    
    return {'x': x.reshape((x.shape[0], x.shape[1], -1)), 'y': y.reshape((y.shape[0], y.shape[1], -1))}

In [7]:
for category in data.keys():
    
    # Shuffle the lists of time series (we don't want to make any assumptions about the order)
    random.shuffle(data[category])
    
    # Build sequences from the time series
    X, y = [], []
    for i, time_series in enumerate(data[category]): 
        if (len(time_series) >= sample_length): # assert we can draw at least one sample from the time_series
            sequences = to_sequences(time_series)
            X.append(sequences['x']) 
            y.append(sequences['y'])   

    # Build our data sets                
    # Note: there is no overlap between train and validation; each processed time series is used in train xor val    
    split_index = int((1-val_size)*len(X))
    X_train = np.concatenate(X[0:split_index], axis=0)
    X_val = np.concatenate(X[split_index:], axis=0)
    y_train = np.concatenate(y[0:split_index], axis=0)
    y_val = np.concatenate(y[split_index:], axis=0)
    print(category, ': ', X_train.shape, X_val.shape, y_train.shape, y_val.shape)
    
    # Replace the list of time series with a dictionary with the data sets
    data[category] = {'X_train': X_train, 'X_val': X_val, 'y_train': y_train, 'y_val': y_val}

A :  (661911, 128, 1) (161525, 128, 1) (661911, 9, 1) (161525, 9, 1)
B :  (525289, 128, 1) (130754, 128, 1) (525289, 9, 1) (130754, 9, 1)
C :  (729993, 128, 1) (184536, 128, 1) (729993, 9, 1) (184536, 9, 1)
D :  (860625, 128, 1) (211567, 128, 1) (860625, 9, 1) (211567, 9, 1)
E :  (586833, 128, 1) (144234, 128, 1) (586833, 9, 1) (144234, 9, 1)
F :  (17665, 128, 1) (5476, 128, 1) (17665, 9, 1) (5476, 9, 1)


## ML

In [8]:
input_shape = (X_train.shape[1], X_train.shape[2])
dropout_rate = 0.2
batch_size = 128
lstm_units = 128
epochs = 1000

def build_model(input_shape, lstm_units, dropout_rate):
    input_layer = tfkl.Input(shape=input_shape)
      
    # Block one; bidirectional LSTMs with regularizers, BNormalization and dropout
    x = tfkl.Bidirectional(
        tfkl.LSTM(units=lstm_units, return_sequences=True, kernel_regularizer=tfk.regularizers.l2(0.001)))(input_layer)
    x = tfkl.BatchNormalization()(x)
    x = tfkl.Dropout(dropout_rate)(x)

    # Block two; similar to previous but no bidirectionality or regularizer
    x = tfkl.LSTM(units=lstm_units // 2)(x)
    x = tfkl.BatchNormalization()(x)
    x = tfkl.Dropout(dropout_rate)(x)
    x = tfkl.Dense(units=forecast_length)(x)

    # Compile using MSE as loss function and the Adam optimizer
    model = tfk.Model(inputs=input_layer, outputs=x)
    model.compile(optimizer='adam', loss='mse')

    return model

In [9]:
# Stop training when validation loss stops improving, maintain best weights
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,         # how many epochs to check for improvement before stopping
    restore_best_weights=True,
)
    
final_val_losses = {}
final_val_losses_per_step = {}

for category in data.keys():
    print(f"Training model for CATEGORY {category}...")
    
    # Build and train mode   
    model = build_model(input_shape, lstm_units, dropout_rate)
    history = model.fit(data[category]['X_train'],
                        data[category]['y_train'], 
                        batch_size=batch_size, 
                        epochs=epochs, 
                        validation_data=(data[category]['X_val'], data[category]['y_val']),
                        callbacks=early_stopping,
                        verbose=1)
   
    # Save model
    model.save(os.path.join('LSTM_v2', category))

    # Evaluate on validation data
    val_result = model.evaluate(data[category]['X_val'], data[category]['y_val'], verbose=0)
    final_val_losses[category] = val_result
    
    # Val loss for each prediction step
    final_val_losses_per_step[category] = []
    y_pred = model.predict(data[category]['X_val'])
    for t in range(forecast_length):
        mse = tf.keras.metrics.MeanSquaredError()
        mse.update_state(data[category]['y_val'][:, t], y_pred[:, t])
        final_val_losses_per_step[category].append(mse.result().numpy())

Training model for CATEGORY A...
Epoch 1/1000


I0000 00:00:1702637499.167111   10189 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
INFO:tensorflow:Assets written to: LSTM_v2/A/assets


INFO:tensorflow:Assets written to: LSTM_v2/A/assets


Training model for CATEGORY B...
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
INFO:tensorflow:Assets written to: LSTM_v2/B/assets


INFO:tensorflow:Assets written to: LSTM_v2/B/assets


Training model for CATEGORY C...
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
INFO:tensorflow:Assets written to: LSTM_v2/C/assets


INFO:tensorflow:Assets written to: LSTM_v2/C/assets


Training model for CATEGORY D...
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
INFO:tensorflow:Assets written to: LSTM_v2/D/assets


INFO:tensorflow:Assets written to: LSTM_v2/D/assets


Training model for CATEGORY E...
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
INFO:tensorflow:Assets written to: LSTM_v2/E/assets


INFO:tensorflow:Assets written to: LSTM_v2/E/assets


Training model for CATEGORY F...
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
INFO:tensorflow:Assets written to: LSTM_v2/F/assets


INFO:tensorflow:Assets written to: LSTM_v2/F/assets




In [10]:
# Print stats for each model 
for category in data.keys():    
    print(f"Category {category} val MSE: {final_val_losses[category]}")
    print(f"Val MSE per step: {final_val_losses_per_step[category]}")
    print()

Category A val MSE: 0.0056684562005102634
Val MSE per step: [0.0026399393, 0.0036647862, 0.004517883, 0.005260236, 0.005812976, 0.0064202137, 0.006928379, 0.0074861846, 0.00792583]

Category B val MSE: 0.007554300129413605
Val MSE per step: [0.003924943, 0.0049917274, 0.0059011364, 0.006628354, 0.0074954876, 0.008308838, 0.009189348, 0.01009551, 0.011059475]

Category C val MSE: 0.005262885242700577
Val MSE per step: [0.0024113196, 0.0032250553, 0.003870887, 0.00467309, 0.005444388, 0.005980557, 0.0065766517, 0.0071365065, 0.0077085826]

Category D val MSE: 0.0054681869223713875
Val MSE per step: [0.0031582091, 0.003871543, 0.0043933876, 0.0049927225, 0.0055502984, 0.006025779, 0.00656905, 0.006937013, 0.0074148024]

Category E val MSE: 0.005030099768191576
Val MSE per step: [0.0021856995, 0.0029887406, 0.0037141447, 0.004399944, 0.005032844, 0.00565759, 0.0063287932, 0.00698681, 0.007628505]

Category F val MSE: 0.005575011018663645
Val MSE per step: [0.0020815833, 0.0030270952, 0.003

In [11]:
# Print average val loss
print(f"Avg val MSE: {sum(final_val_losses.values())/len(data.keys())}")

Avg val MSE: 0.005759823213641842
