In [None]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, GlobalAveragePooling1D, Dropout, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.backend import clear_session

import matplotlib.pyplot as plt
from transformer import TransformerBlock, TokenAndPositionEmbedding
from utils import load_dataset, train

In [None]:
X_train, y_train, X_test, y_test = load_dataset('m2')

In [None]:
maxlen = 1000
sample_size = 4
embed_dim = 4  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 64  # Hidden layer size in feed forward network inside transformer

clear_session()

inputs = Input(shape=(maxlen,))

embedding_layer = TokenAndPositionEmbedding(maxlen, sample_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)
x = GlobalAveragePooling1D()(x)
x = Dropout(0.1)(x)
x = Dense(32, activation='relu')(x)
x = Dropout(0.1)(x)
outputs = Dense(1, activation='relu')(x)

model = Model(inputs, outputs)

# model.summary()

In [None]:
scheduler = LearningRateScheduler(schedule)
es = EarlyStopping(monitor='loss', patience=15, verbose=1)
optimizer = Adam(lr=1e-3)

epochs = 1500
validation_freq = 5

In [None]:
model = train(dataset=(X_train, y_train, X_test, y_test),
            model=model,
            epochs=epochs,
            validation_freq=validation_freq,
            optimizer=optimizer,
            callbacks=[scheduler, es])

In [None]:
model.evaluate(X_test, y_test)

In [None]:
model.save('../model_saves/transformer-model.h5')