In [1]:
from loader import pipeline
import tensorflow as tf
from tensorflow.keras.layers import TextVectorization
from models.transformer.transformer import Transformer
from models.custom_metrics.metrics import loss_function, accuracy_function
from translator.translator import TranslatorWithBeamSearch

In [2]:
data_path = "dataset\set_2\dialogs.txt"
BATCH_SIZE = 128

In [3]:
train = pipeline.create_dataset(data_path,BATCH_SIZE = 128)
vectorizer = TextVectorization(max_tokens=5000,standardize=pipeline.add_start_and_end_tokens)
vectorizer.adapt(train.map(lambda x: x["question"]))
vocab = vectorizer.get_vocabulary()

In [4]:
num_layers = 2
model_dim = 64
dff = 128
num_heads = 4
dropout_rate = 0.2
vocab_len = len(vocab)
INIT_LR = 1e-4
MAX_LR = 1e-2
MAX_TOKENS = 32
EPOCHS = 100

In [5]:
transformer = Transformer(
    num_layers=num_layers,
    model_dim=model_dim,
    num_heads=num_heads,
    dff=dff,
    input_vocab_size=vocab_len,
    target_vocab_size=vocab_len,
    dropout_rate=dropout_rate,
    max_tokens=MAX_TOKENS)

In [6]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.Mean(name='train_accuracy')

In [7]:
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, model_dim, warmup_steps=1000):
        super(CustomSchedule, self).__init__()

        self.model_dim = model_dim
        self.model_dim = tf.cast(self.model_dim, tf.float32)

        self.warmup_steps = warmup_steps
    def __call__(self, step):
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps ** -1.5)

        return tf.math.rsqrt(self.model_dim) * tf.math.minimum(arg1, arg2)


learning_rate = CustomSchedule(model_dim)

optimizer = tf.keras.optimizers.Adam(
                            learning_rate, 
                            beta_1=0.9, 
                            beta_2=0.98,
                            epsilon=1e-9)

In [8]:
train_step_signature = [
tf.TensorSpec(shape=(None, None), dtype=tf.int64),
tf.TensorSpec(shape=(None, None), dtype=tf.int64),
]


@tf.function(input_signature=train_step_signature)
def train_step(inp, tar):
    tar_inp = tar[:, :-1]
    tar_real = tar[:, 1:]

    with tf.GradientTape() as tape:
        predictions, _ = transformer([inp, tar_inp],
                    training = True)
        loss = loss_function(tar_real, predictions)

    gradients = tape.gradient(loss, transformer.trainable_variables)
    optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))

    train_loss(loss)
    train_accuracy(accuracy_function(tar_real, predictions))

In [9]:
for epoch in range(EPOCHS):


    train_loss.reset_states()
    train_accuracy.reset_states()

    for batch, inputs in enumerate(train):
        train_step(vectorizer(inputs["question"]), vectorizer(inputs["answer"]))


    print(f'Epoch {epoch + 1} Loss {train_loss.result():.4f} Accuracy {train_accuracy.result():.4f}')


Epoch 1 Loss 7.6936 Accuracy 0.0217
Epoch 2 Loss 6.9952 Accuracy 0.5117
Epoch 3 Loss 6.2475 Accuracy 0.5235
Epoch 4 Loss 5.2906 Accuracy 0.5337
Epoch 5 Loss 4.2283 Accuracy 0.5383
Epoch 6 Loss 3.5042 Accuracy 0.5289
Epoch 7 Loss 3.3994 Accuracy 0.4726
Epoch 8 Loss 5.6091 Accuracy 0.1331
Epoch 9 Loss 5.2025 Accuracy 0.1913
Epoch 10 Loss 4.9232 Accuracy 0.2182
Epoch 11 Loss 4.6334 Accuracy 0.2551
Epoch 12 Loss 4.4981 Accuracy 0.2653
Epoch 13 Loss 4.2874 Accuracy 0.2866
Epoch 14 Loss 4.2470 Accuracy 0.2817
Epoch 15 Loss 4.0993 Accuracy 0.2919
Epoch 16 Loss 3.9797 Accuracy 0.3000
Epoch 17 Loss 3.8588 Accuracy 0.3087
Epoch 18 Loss 3.7097 Accuracy 0.3222
Epoch 19 Loss 3.6103 Accuracy 0.3293
Epoch 20 Loss 3.4736 Accuracy 0.3472
Epoch 21 Loss 3.3481 Accuracy 0.3580
Epoch 22 Loss 3.2084 Accuracy 0.3699
Epoch 23 Loss 3.1188 Accuracy 0.3786
Epoch 24 Loss 2.9708 Accuracy 0.3971
Epoch 25 Loss 2.8620 Accuracy 0.4065
Epoch 26 Loss 2.7253 Accuracy 0.4250
Epoch 27 Loss 2.6455 Accuracy 0.4322
Epoch 28 L

In [10]:
!mkdir tmp
transformer.save(f'tmp/transformer_{num_layers}_{model_dim}_{dff}_{num_heads}')





INFO:tensorflow:Assets written to: tmp/transformer_2_64_128_4\assets


INFO:tensorflow:Assets written to: tmp/transformer_2_64_128_4\assets


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import TextVectorization
from models.transformer.transformer import Transformer
from models.custom_metrics.metrics import loss_function, accuracy_function, loss_function_no_mask
from translator.translator import TranslatorWithBeamSearch

In [None]:
data_path = "dataset\set_2\dialogs.txt"
BATCH_SIZE = 128
train = pipeline.create_dataset(data_path,BATCH_SIZE = 128)
vectorizer = TextVectorization(max_tokens=5000,standardize=pipeline.add_start_and_end_tokens)
vectorizer.adapt(train.map(lambda x: x["question"]))
vocab = vectorizer.get_vocabulary()
# load the transformer if needed
#transformer = tf.keras.models.load_model("tmp\\transformer")

In [11]:

translator = TranslatorWithBeamSearch(vectorizer,vectorizer,transformer,10)


In [12]:
sentence = tf.constant(["good luck with that."])
output=translator(sentence,30)
# this will returns the best sentence
output[1]

"thank you very much for you very much i'll pass as news with me about [UNK] i'm very much very much see you very much see you very much"