# Transformer

In [None]:
# Imports

In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
import tensorflow as tf

def scheduler(epoch, lr):
    if epoch < 100:
        return lr
    elif epoch%10==0:
        return lr * tf.math.exp(-0.3)
    else:
        return lr

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-4)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-4)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res


def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    inputs = tf.keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(1)(x)
    return tf.keras.Model(inputs, outputs)


# Implement a transformer model on the dataset
def transformer_model(X_train, y_train, X_test, y_test, n_epochs, num_blocks, num_heads):
    # define the model
    model = build_model(
        input_shape=X_train.shape[1:],
        head_size=8,
        num_heads=num_heads,
        ff_dim=64,
        num_transformer_blocks=num_blocks,
        mlp_units=[32],
        mlp_dropout=0.1,
        dropout=0.1,# more the more block you use
    )

    model.compile(
        loss="mse",
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.002),
    )
    

    callbacks = [tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=1)]

    val_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test[:,0]))
    val_dataset = val_dataset.batch(128)

    model.fit(
        X_train,
        y_train[:,0],
        epochs=n_epochs,
        batch_size=32,
        callbacks=callbacks,
        validation_data=val_dataset
    )
    
    return model

    

def transformer_fitting_and_evaluation(features, target, dataset_name, model_file_identifier="_", num_blocks=2, num_heads=8):
    # Test set is the last 90 days
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=2160, random_state=42, shuffle=False)
    
    Xscaler = StandardScaler()
    Xscaler.fit(X_train)
    scaled_X_train = Xscaler.transform(X_train)
    scaled_X_test = Xscaler.transform(X_test)
    Yscaler = StandardScaler()
    Yscaler.fit(y_train.values.reshape(-1, 1))
    scaled_y_train = Yscaler.transform(y_train.values.reshape(-1, 1))
    scaled_y_test = Yscaler.transform(y_test.values.reshape(-1, 1))
    
    transformer_X_train = np.expand_dims(scaled_X_train, axis=-1)
    transformer_X_test = np.expand_dims(scaled_X_test, axis=-1)
    
    epochs = 150
    model_file_path = "transformer/" + model_file_identifier + "/" + dataset_name
    try: # Try to load the model if it already exists
        model = tf.keras.models.load_model(model_file_path)
    except: # If it doesn't exist, train the model and save it
        model = transformer_model(transformer_X_train, scaled_y_train, transformer_X_test, scaled_y_test, epochs, num_blocks, num_heads)
        model.save(model_file_path)
        return 0
    
    # evaluate the model on the test set
    y_pred_scaled = model.predict(transformer_X_test)
    y_pred = Yscaler.inverse_transform(y_pred_scaled)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    print(f'{dataset_name}, MAE: {mae:.2f}, MAPE: {mape:.3f}')

    return mape
    

## Run some tests

In [None]:
# Hyp tuning blocks [1,2], heads [8, 32, 64]
# Different lookback horizons