In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/TFG/Pruebas ejecucion

In [None]:
!apt install --allow-change-held-packages libcudnn8=8.1.0.77-1+cuda11.2
!pip uninstall -y -q tensorflow keras tensorflow-estimator tensorflow-text
!pip install -q -U tensorflow-text tensorflow

In [None]:
from data_preprocessing import create_datasets, text_vectorization, process_text
from model import Transformer

import tensorflow as tf

from tensorflow import keras
import numpy as np

In [None]:
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, d_model, warmup_steps=4000):
        super().__init__()

        self.d_model = d_model
        self.d_model = tf.cast(self.d_model, tf.float32)
        self.warmup_steps = warmup_steps

    def get_config(self):
        config = {
            'd_model': d_model, #check this, with self.d_model raises an error
            'warmup_steps': self.warmup_steps,
        }
        return config

    def __call__(self, step):
        step = tf.cast(step, dtype=tf.float32)
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps ** -1.5)

        return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

In [None]:
def masked_loss(label, pred):
    mask = label != 0
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits = True,
        reduction = 'none',
    )
    loss = loss_object(label, pred)

    mask = tf.cast(mask, dtype=loss.dtype)
    loss *= mask

    loss = tf.reduce_sum(loss) / tf.reduce_sum(mask)
    return loss

In [None]:
def masked_acc(label, pred):
    pred = tf.argmax(pred, axis = 2)
    label = tf.cast(label, pred.dtype)
    match = label == pred

    mask = label != 0
    match = match & mask

    match = tf.cast(match, dtype=tf.float32)
    mask = tf.cast(mask, dtype=tf.float32)
    return  tf.reduce_sum(match) / tf.reduce_sum(mask)

In [None]:
train, val, test = create_datasets()
context_text_processor_train, target_text_processor_train = text_vectorization(train)
context_text_processor_val, target_text_processor_val = text_vectorization(val)
context_text_processor_test, target_text_processor_test = text_vectorization(test)

# print(context_text_processor.get_vocabulary()[:10])
# print(target_text_processor.get_vocabulary()[:10])

train_ds = train.map(lambda x, y: process_text(x, y, context_text_processor_train, target_text_processor_train), tf.data.AUTOTUNE)
val_ds = val.map(lambda x, y: process_text(x, y, context_text_processor_val, target_text_processor_val), tf.data.AUTOTUNE)
test_ds = val.map(lambda x, y: process_text(x, y, context_text_processor_test, target_text_processor_test), tf.data.AUTOTUNE)

In [None]:
#! Model
num_layers = 4
d_model = 128
dff = 512
num_heads = 8
dropout_rate = 0.1

In [None]:
transformer = Transformer(
    num_layers = num_layers,
    d_model = d_model,
    num_heads = num_heads,
    dff = dff,
    input_vocab_size = context_text_processor_train.vocabulary_size(),
    target_vocab_size = target_text_processor_train.vocabulary_size(),
    dropout_rate = dropout_rate,
)

In [None]:
#! Loss and Optimizer
learning_rate = CustomSchedule(d_model)
my_optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

In [None]:
#! Compile
transformer.compile(
    loss = masked_loss,
    optimizer=my_optimizer,
    metrics = [masked_acc],
)

In [None]:
transformer.fit(
    train_ds,
    epochs = 10,
    validation_data = val_ds,
)

In [None]:
transformer.save('my_model_16032023')

In [None]:
# reconstructed_model = keras.models.load_model("my_model_16032023", custom_objects={ 'CustomSchedule' : CustomSchedule, 'masked_loss': masked_loss, 'masked_acc': masked_acc })

In [None]:
# np.testing.assert_allclose(
#     transformer.predict(val_ds), reconstructed_model.predict(val_ds)
# )

In [None]:
# reconstructed_model.fit(
#     test_ds,
#     epochs = 3,
#     validation_data = val_ds,
# )