In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, Callback

# Load data set

In [None]:
tmp_x = np.load('MinMax_scaled_x.npy') # MinMax-scaled x coordinates
tmp_y = np.load('MinMax_scaled_y.npy') # MinMax-scaled x coordinates
y = np.load('log_Lp.npy') # Target value (Lp)
Lw = tmp_x.shape[1] # Window size

# Construct training & validation data sets

In [None]:
x = np.zeros((tmp_x.shape[0],tmp_x.shape[1],2)) # Input data
for i in range(x.shape[0]) :
    x[i,:,0] = tmp_x[i]
    x[i,:,1] = tmp_y[i]

In [None]:
# Split the data set into training and validation data sets
n_samples = x.shape[0]
IDX = np.arange(0,n_samples,1,dtype = 'int')
np.random.shuffle(IDX)
train_x = x[IDX[:int(n_samples*0.9)]]
test_x = x[IDX[int(n_samples*0.9):]]
train_y = y[IDX[:int(n_samples*0.9)]]
test_y = y[IDX[int(n_samples*0.9):]]

# Implement transformer-based architecture

In [None]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout_rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential([
            layers.Dense(ff_dim, activation="relu"),
            layers.Dense(embed_dim),
        ])
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(dropout_rate)
        self.dropout2 = layers.Dropout(dropout_rate)

    def call(self, inputs, training=False):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

class PositionalEncoding(layers.Layer):
    def __init__(self, sequence_length, embed_dim):
        super(PositionalEncoding, self).__init__()
        self.position_embeddings = self.add_weight(
            shape=(1, sequence_length, embed_dim), 
            initializer="zeros", 
            trainable=True, 
            name="pos_embedding"
        )

    def call(self, inputs):
        return inputs + self.position_embeddings

def build_transformer_model(sequence_length, embed_dim=64, num_heads=64, ff_dim=128, num_layers=5, dropout_rate=0):
    inputs = keras.Input(shape=(sequence_length, 2))  # (batch_size, sequence_length, 2)
    x = layers.Dense(embed_dim)(inputs)  # Linear projection to embedding size
    x = PositionalEncoding(sequence_length, embed_dim)(x)

    for _ in range(num_layers):
        x = TransformerBlock(embed_dim, num_heads, ff_dim, dropout_rate)(x)

    x = layers.GlobalAveragePooling1D()(x)  # Pooling over sequence dimension
    outputs = layers.Dense(1, activation = 'relu')(x)  # Scalar output

    return keras.Model(inputs, outputs)

In [None]:
X = build_transformer_model(Lw)
X.compile(optimizer = Adam(10**-4), loss = 'MAE')
X.summary()

# Training setting

In [None]:
MODEL_SAVE_FOLDER_PATH = './'
model_path = MODEL_SAVE_FOLDER_PATH + 'Model.hdf5'
cb_checkpoint = ModelCheckpoint(filepath=model_path, monitor='val_loss', mode = 'min', verbose=1, save_best_only=True) # Model checkpoint
cb_early_stopping = EarlyStopping(monitor='val_loss', mode = 'min', patience=50, restore_best_weights=True) # Early stopping
lr_reduction = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=10**-0.5, patience=15, min_lr=1e-7) # Reduce learning rate

In [None]:
history = X.fit(train_x,train_y, epochs=10000,batch_size = 2**10, callbacks=[cb_checkpoint,cb_early_stopping,SaveHistory(),lr_reduction],validation_data=(test_x,test_y))