In [1]:
import models
from loader import pipeline
import tensorflow as tf
from tensorflow.keras.layers import TextVectorization
from models.transformer.transformer import Transformer
from models.custom_metrics.metrics import loss_function, accuracy_function
import tensorflow_addons as tfa
from translator.translator import TranslatorWithVectorizer

In [2]:
data_path = "dataset\set_2\dialogs.txt"
BATCH_SIZE = 128

In [3]:
train = pipeline.create_dataset(data_path,BATCH_SIZE = 128)
vectorizer = TextVectorization(max_tokens=5000,standardize=pipeline.add_start_and_end_tokens)
vectorizer.adapt(train.map(lambda x: x["question"]))
vocab = vectorizer.get_vocabulary()

In [5]:
num_layers = 2
model_dim = 128
dff = 256
num_heads = 4
dropout_rate = 0.1
vocab_len = len(vocab)
INIT_LR = 1e-4
MAX_LR = 1e-2
MAX_TOKENS = 128
EPOCHS = 60

In [6]:
transformer = Transformer(
    num_layers=num_layers,
    model_dim=model_dim,
    num_heads=num_heads,
    dff=dff,
    input_vocab_size=vocab_len,
    target_vocab_size=vocab_len,
    dropout_rate=dropout_rate,
    max_tokens=MAX_TOKENS)

In [7]:

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.Mean(name='train_accuracy')

In [8]:
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, model_dim, warmup_steps=1000):
        super(CustomSchedule, self).__init__()

        self.model_dim = model_dim
        self.model_dim = tf.cast(self.model_dim, tf.float32)

        self.warmup_steps = warmup_steps
    def __call__(self, step):
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps ** -1.5)

        return tf.math.rsqrt(self.model_dim) * tf.math.minimum(arg1, arg2)


learning_rate = CustomSchedule(model_dim)

optimizer = tf.keras.optimizers.Adam(
                            learning_rate, 
                            beta_1=0.9, 
                            beta_2=0.98,
                            epsilon=1e-9)

In [9]:


train_step_signature = [
tf.TensorSpec(shape=(None, None), dtype=tf.int64),
tf.TensorSpec(shape=(None, None), dtype=tf.int64),
]


@tf.function(input_signature=train_step_signature)
def train_step(inp, tar):
    tar_inp = tar[:, :-1]
    tar_real = tar[:, 1:]

    with tf.GradientTape() as tape:
        predictions, _ = transformer([inp, tar_inp],
                    training = True)
        loss = loss_function(tar_real, predictions)

    gradients = tape.gradient(loss, transformer.trainable_variables)
    optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))

    train_loss(loss)
    train_accuracy(accuracy_function(tar_real, predictions))

In [10]:
for epoch in range(EPOCHS):


    train_loss.reset_states()
    train_accuracy.reset_states()

    for batch, inputs in enumerate(train):
        train_step(vectorizer(inputs["question"]), vectorizer(inputs["answer"]))


    print(f'Epoch {epoch + 1} Loss {train_loss.result():.4f} Accuracy {train_accuracy.result():.4f}')


Epoch 1 Loss 7.7400 Accuracy 0.0469
Epoch 2 Loss 7.1912 Accuracy 0.1337
Epoch 3 Loss 6.6020 Accuracy 0.1333
Epoch 4 Loss 5.9891 Accuracy 0.1346
Epoch 5 Loss 5.6093 Accuracy 0.1551
Epoch 6 Loss 5.4209 Accuracy 0.1665
Epoch 7 Loss 5.2014 Accuracy 0.1825
Epoch 8 Loss 4.9267 Accuracy 0.2057
Epoch 9 Loss 4.6923 Accuracy 0.2214
Epoch 10 Loss 4.4753 Accuracy 0.2395
Epoch 11 Loss 4.2610 Accuracy 0.2575
Epoch 12 Loss 4.0575 Accuracy 0.2751
Epoch 13 Loss 3.8435 Accuracy 0.2983
Epoch 14 Loss 3.6621 Accuracy 0.3150
Epoch 15 Loss 3.4594 Accuracy 0.3376
Epoch 16 Loss 3.2567 Accuracy 0.3629
Epoch 17 Loss 3.0589 Accuracy 0.3881
Epoch 18 Loss 2.8582 Accuracy 0.4105
Epoch 19 Loss 2.6631 Accuracy 0.4384
Epoch 20 Loss 2.4702 Accuracy 0.4681
Epoch 21 Loss 2.2767 Accuracy 0.4966
Epoch 22 Loss 2.0859 Accuracy 0.5318
Epoch 23 Loss 1.9130 Accuracy 0.5623
Epoch 24 Loss 1.7461 Accuracy 0.5904
Epoch 25 Loss 1.5880 Accuracy 0.6216
Epoch 26 Loss 1.4229 Accuracy 0.6560
Epoch 27 Loss 1.3472 Accuracy 0.6652
Epoch 28 L

In [11]:
!mkdir -p tmp
transformer.save('tmp/transformer')

A subdirectory or file -p already exists.
Error occurred while processing: -p.






INFO:tensorflow:Assets written to: tmp/transformer\assets


INFO:tensorflow:Assets written to: tmp/transformer\assets


In [12]:
translator = TranslatorWithVectorizer(vectorizer,transformer)

need longer training 

In [31]:
test = tf.constant(["what did you do?"])
answer,_,_ = translator(test,128)
answer

'[START] i got a good nose for that\x97cigarettes stink [END] '

In [33]:
test = tf.constant(["where are the band-aids?"])
answer,_,_ = translator(test,128)
answer

'[START] i got a good spot [END] '