In [1]:
from loader import pipeline
import tensorflow as tf
from tensorflow.keras.layers import TextVectorization
from models.transformer.transformer import Transformer
from models.custom_metrics.metrics import loss_function, accuracy_function
from translator.translator import TranslatorWithBeamSearch
from os import path

In [2]:
data_path = path.join("dataset","dialogs.txt")
path = path.join("dataset","glove.6B.100d.txt")
BATCH_SIZE = 128

In [3]:
train = pipeline.create_dataset(data_path,BATCH_SIZE = 128)
vectorizer = TextVectorization(max_tokens=5000,standardize=pipeline.add_start_and_end_tokens)
vectorizer.adapt(train.map(lambda x: x["question"]))
vocab = vectorizer.get_vocabulary()

In [None]:
num_layers = 4
model_dim = 100
dff = 256
num_heads = 4
dropout_rate = 0.2
vocab_len = len(vocab)
INIT_LR = 1e-4
MAX_LR = 1e-2
MAX_TOKENS = 32
EPOCHS = 100

In [None]:
transformer = Transformer(
    num_layers=num_layers,
    model_dim=model_dim,
    num_heads=num_heads,
    dff=dff,
    input_vocab_size=vocab_len,
    target_vocab_size=vocab_len,
    dropout_rate=dropout_rate,
    max_tokens=MAX_TOKENS)

In [None]:
transformer._load_embedding(path,vocab)

In [None]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.Mean(name='train_accuracy')

In [None]:
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, model_dim, warmup_steps=1000):
        super(CustomSchedule, self).__init__()

        self.model_dim = model_dim
        self.model_dim = tf.cast(self.model_dim, tf.float32)

        self.warmup_steps = warmup_steps
    def __call__(self, step):
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps ** -1.5)

        return tf.math.rsqrt(self.model_dim) * tf.math.minimum(arg1, arg2)


learning_rate = CustomSchedule(model_dim)

optimizer = tf.keras.optimizers.Adam(
                            learning_rate, 
                            beta_1=0.9, 
                            beta_2=0.98,
                            epsilon=1e-9)

In [None]:
train_step_signature = [
tf.TensorSpec(shape=(None, None), dtype=tf.int64),
tf.TensorSpec(shape=(None, None), dtype=tf.int64),
]


@tf.function(input_signature=train_step_signature)
def train_step(inp, tar):
    tar_inp = tar[:, :-1]
    tar_real = tar[:, 1:]

    with tf.GradientTape() as tape:
        predictions, _ = transformer([inp, tar_inp],
                    training = True)
        loss = loss_function(tar_real, predictions)

    gradients = tape.gradient(loss, transformer.trainable_variables)
    optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))

    train_loss(loss)
    train_accuracy(accuracy_function(tar_real, predictions))

In [None]:
for epoch in range(EPOCHS):


    train_loss.reset_states()
    train_accuracy.reset_states()

    for batch, inputs in enumerate(train):
        train_step(vectorizer(inputs["question"]), vectorizer(inputs["answer"]))


    print(f'Epoch {epoch + 1} Loss {train_loss.result():.4f} Accuracy {train_accuracy.result():.4f}')


In [None]:
!mkdir tmp
transformer.save(f'tmp/transformer_{num_layers}_{model_dim}_{dff}_{num_heads}')

In [None]:
# load the transformer if needed
#transformer = tf.keras.models.load_model("tmp\\transformer")

In [None]:

translator = TranslatorWithBeamSearch(vectorizer,vectorizer,transformer,10)


In [None]:
sentence = tf.constant(["how are you doing."])
output=translator(sentence,20)
# this will returns the best sentence
output[1]