In [1]:
import numpy as np
import tensorflow as tf
import os

2025-06-10 11:35:32.014384: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-10 11:35:32.051698: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-06-10 11:35:32.051725: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-06-10 11:35:32.052635: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-06-10 11:35:32.059150: I tensorflow/core/platform/cpu_feature_guar

In [2]:
tf.config.list_physical_devices("GPU")

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
# import the train and validation data
all_train_data = os.listdir("seq_2_seq_train_data")

In [4]:
# get the inputs and targets separated
all_train_input_files = [file for file in all_train_data if file.split('.')[0][-14:] == 'input_features']
all_train_target_files = [file for file in all_train_data if file.split('.')[0][-15:] == 'target_features']

In [5]:
all_train_input_files.sort()
all_train_input_files

['block_0101_extracted_input_features.npy',
 'block_0102_extracted_input_features.npy',
 'block_0203_extracted_input_features.npy',
 'block_0301_extracted_input_features.npy']

In [6]:
all_train_target_files.sort()
all_train_target_files

['block_0101_extracted_target_features.npy',
 'block_0102_extracted_target_features.npy',
 'block_0203_extracted_target_features.npy',
 'block_0301_extracted_target_features.npy']

In [7]:
# load the files
loaded_all_train_input_files = [np.load(os.path.join("seq_2_seq_train_data", file)) for file in all_train_input_files]
loaded_all_train_target_files = [np.load(os.path.join("seq_2_seq_train_data", file)) for file in all_train_target_files]

In [8]:
# stack the files in the list
X_train = np.vstack(loaded_all_train_input_files)

In [9]:
X_train.shape

(3640, 13, 32)

In [10]:
y_train = np.vstack(loaded_all_train_target_files)

In [11]:
y_train.shape

(3640, 7, 32)

In [12]:
# load validation data
os.listdir("seq_2_seq_valid_data")

['block_0204_extracted_input_features.npy',
 'block_0204_extracted_target_features.npy']

In [13]:
X_valid = np.load(os.path.join("seq_2_seq_valid_data", "block_0204_extracted_input_features.npy"))

In [14]:
X_valid.shape

(910, 13, 32)

In [15]:
y_valid = np.load(os.path.join("seq_2_seq_valid_data", "block_0204_extracted_target_features.npy"))

In [16]:
y_valid.shape

(910, 7, 32)

In [17]:
# Define the model
def create_sequence_to_sequence_model(input_timesteps, input_features, output_timesteps):
    # Input layer for the encoder
    inputs = tf.keras.layers.Input(shape=(input_timesteps, input_features))

    # Encoder LSTM
    encoder = tf.keras.layers.LSTM(64, activation='relu', return_state=True, return_sequences=False)
    encoder_outputs, state_h, state_c = encoder(inputs)

    # Decoder LSTM: We now provide the encoder's state and initialize it with the encoder's final states
    # We reshape the output of the encoder to make sure it is in the expected form for the decoder
    decoder_input = tf.keras.layers.RepeatVector(output_timesteps)(encoder_outputs)

    # Decoder LSTM, where the output sequence length is `output_timesteps` (7)
    decoder_lstm = tf.keras.layers.LSTM(64, activation='relu', return_sequences=True)
    decoder_outputs = decoder_lstm(decoder_input, initial_state=[state_h, state_c])

    # Dense layer to predict the next 7 time periods (each with 32 features)
    outputs = tf.keras.layers.Dense(input_features)(decoder_outputs)

    # Create the model
    model = tf.keras.models.Model(inputs, outputs)
    
    # Compile the model
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    model.compile(loss='mean_squared_error', optimizer=opt, metrics = ['mean_absolute_error'])
    
    return model

# Define the input shape and output shape
input_timesteps = 13  # 12 time periods
input_features = 32   # 32 features per time period
output_timesteps = 7  # Predict the next 7 time periods

# Create the model
model = create_sequence_to_sequence_model(input_timesteps, input_features, output_timesteps)

# Summarize the model
model.summary()




2025-06-10 11:35:33.920282: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 495 MB memory:  -> device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:06:00.0, compute capability: 7.0


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 13, 32)]             0         []                            
                                                                                                  
 lstm (LSTM)                 [(None, 64),                 24832     ['input_1[0][0]']             
                              (None, 64),                                                         
                              (None, 64)]                                                         
                                                                                                  
 repeat_vector (RepeatVecto  (None, 7, 64)                0         ['lstm[0][0]']                
 r)                                                                                           

In [18]:
# Decide on early stopping criteria
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', restore_best_weights = True, verbose=1, patience=15) 

In [19]:
# Train the model

history = model.fit(X_train, y_train,
          epochs = 300, callbacks = [es],
          validation_data = (X_valid, y_valid), 
                       batch_size = 1000, validation_batch_size = 1000)

Epoch 1/300


2025-06-10 11:35:36.385033: I external/local_xla/xla/service/service.cc:168] XLA service 0x5606c0b68ca0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-06-10 11:35:36.385064: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): Tesla V100S-PCIE-32GB, Compute Capability 7.0
2025-06-10 11:35:36.392252: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-06-10 11:35:36.407786: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907
I0000 00:00:1749573336.502392 3861112 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78/300
Epoch 7

In [20]:
# check if the model does prediction for validation data
check_valid_preds = model.predict(X_valid)



In [21]:
check_valid_preds.shape

(910, 7, 32)

In [26]:
# save the model
model.save("models/seq_2_seq_generic_model.keras")

In [27]:
# get the mean of the validation predictions for future sanity checks
check_valid_preds.mean()

0.3711441