In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/TFG/Pruebas ejecucion

/content/drive/MyDrive/TFG/Pruebas ejecucion


In [None]:
!apt install --allow-change-held-packages libcudnn8=8.1.0.77-1+cuda11.2
!pip uninstall -y -q tensorflow keras tensorflow-estimator tensorflow-text
!pip install -q -U tensorflow-text==2.11.0 tensorflow==2.11.0

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following packages will be REMOVED:
  libcudnn8-dev
The following held packages will be changed:
  libcudnn8
The following packages will be DOWNGRADED:
  libcudnn8
0 upgraded, 0 newly installed, 1 downgraded, 1 to remove and 21 not upgraded.
Need to get 430 MB of archives.
After this operation, 1,153 MB disk space will be freed.
Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64  libcudnn8 8.1.0.77-1+cuda11.2 [430 MB]
Fetched 430 MB in 5s (91.5 MB/s)
(Reading database ... 128285 files and directories currently installed.)
Removing libcudnn8-dev (8.7.0.84-1+cuda11.8) ...
update-alternatives: removing manually selected alternative - switching libcudnn to auto mode
(Reading database ... 128252 files and directories currently installed.)
Preparing to unpack .../libcudnn8_8.1.0.77-1+cuda11.2_amd64.deb ...
Unpacking libcudnn8 (8.1.0.77-1+cuda11.2) over (8.7.0.84-1

In [None]:
from data_preprocessing import create_datasets, text_vectorization, process_text
from model import Transformer, CustomSchedule, masked_loss, masked_acc

import tensorflow as tf

from tensorflow import keras
import numpy as np
import pandas as pd

In [None]:
tf.__version__

'2.11.0'

In [None]:
train, val, test = create_datasets()
context_text_processor, target_text_processor = text_vectorization(train)

# print(context_text_processor.get_vocabulary()[:10])
# print(target_text_processor.get_vocabulary()[:10])

train_ds = train.map(lambda x, y: process_text(x, y, context_text_processor, target_text_processor), tf.data.AUTOTUNE)
val_ds = val.map(lambda x, y: process_text(x, y, context_text_processor, target_text_processor), tf.data.AUTOTUNE)
test_ds = val.map(lambda x, y: process_text(x, y, context_text_processor, target_text_processor), tf.data.AUTOTUNE)

In [None]:
def create_metrics(history):
    try:
        metrics = pd.read_csv('metrics.csv')
    except:
        metrics = pd.DataFrame(columns = ['loss', 'masked_acc', 'val_loss', 'val_masked_acc'])

    new_metrics = pd.DataFrame(history.history)
    metrics = pd.concat([metrics, new_metrics], ignore_index = True)

    metrics.to_csv('metrics.csv', index = False)

In [None]:
#! Model
num_layers = 4
d_model = 128
dff = 512
num_heads = 8
dropout_rate = 0.1

In [None]:
transformer = Transformer(
    num_layers = num_layers,
    d_model = d_model,
    num_heads = num_heads,
    dff = dff,
    input_vocab_size = context_text_processor.vocabulary_size(),
    target_vocab_size = target_text_processor.vocabulary_size(),
    dropout_rate = dropout_rate,
)

In [None]:
#! Loss and Optimizer
learning_rate = CustomSchedule(d_model)
my_optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

In [None]:
#! Compile
transformer.compile(
    loss = masked_loss,
    optimizer=my_optimizer,
    metrics = [masked_acc],
)

In [None]:
history = transformer.fit(
    train_ds,
    epochs = 10,
    validation_data = val_ds,
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
create_metrics(history)

In [None]:
# import io
# import json

In [None]:
# context_tokenizer_json = context_text_processor.
# with io.open('context_tokenizer.json', 'w', encoding='utf-8') as f:
#     f.write(json.dumps(context_tokenizer_json, ensure_ascii=False))

In [None]:
# Save model
transformer.save('my_model_28032023')

  return serialization.serialize_keras_object(obj)


In [None]:
reconstructed_model = keras.models.load_model("my_model_28032023", custom_objects={ 'CustomSchedule' : CustomSchedule, 'masked_loss': masked_loss, 'masked_acc': masked_acc })

In [None]:
# np.testing.assert_allclose(
#     transformer.predict(val_ds), reconstructed_model.predict(val_ds)
# )

In [None]:
history = reconstructed_model.fit(
    train_ds,
    epochs = 5,
    validation_data = val_ds,
)
create_metrics(history)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
def print_translation(sentence, translated_sentence, ground_truth):
  print(f'{"Input:":15s}: {sentence}')
  print(f'{"Prediction":15s}: {translated_sentence}')
  print(f'{"Ground truth":15s}: {ground_truth}')

In [None]:
def detokenize_sentence(processor, sentence):
  output_sentence = tf.constant('')
  vocab = processor.get_vocabulary()

  for token in sentence:
    output_sentence = tf.strings.join([output_sentence, vocab[token]], separator=' ')
                                        
  sentence = output_sentence.numpy().decode('utf-8')
  sentence = sentence.replace('[START]', '').replace('[END]', '').strip()
  
  return sentence

In [None]:
def translate(sentence, context_processor, target_processor, model, max_tokens=128):
  # Convert the sentence to tensor
  sentence = tf.constant(sentence)
  assert isinstance(sentence, tf.Tensor)

  # Give the correct shape
  if len(sentence.shape) == 0:
    sentence = sentence[tf.newaxis]
  
  # Tokenize the sentence
  sentence = context_processor(sentence).to_tensor()

  # Input for the encoder
  encoder_input = sentence
  
  # Input for the decoder
  start_end_tokens = target_processor([''])[0]
  start_token = start_end_tokens[0][tf.newaxis]
  end_token = start_end_tokens[-1][tf.newaxis]

  # Convert decoder sentence to TensorArray for feed the model
  output_tensor = tf.TensorArray(dtype=tf.int64, size=0, dynamic_size=True)
  output_tensor = output_tensor.write(0, start_token)
  
  # seq_to_seq generation
  for i in tf.range(max_tokens):
    # Prepare the output tensor
    output = tf.transpose(output_tensor.stack())
    
    # Get the model predictions
    predictions = model((encoder_input, output), training=False)
    
    # Select the last token from the seq_len dimension
    predictions = predictions[:, -1, :] # Shape (batch_size, 1, vocab_size)
    # Get the most probable next token
    predicted_id = tf.argmax(predictions, axis=-1)

    # Concatenate the predicted token to the output sentence
    output_tensor = output_tensor.write(i+1, predicted_id) #TODO: en el tuto es predicted_id[0]

    # Check if the prediction is fully generated with the last token
    if predicted_id == end_token:
      break
  
  # Convert the output to the correct shape
  output = tf.transpose(output_tensor.stack()) # Shape (1, tokens_generated)

  # Detokenize the output sentence
  text = detokenize_sentence(target_processor, output[0])
  return text

In [None]:
sentence = 'I want to be at home today'
ground_truth = 'Quiero estar en casa hoy'

translated_text = translate(sentence, context_text_processor, target_text_processor, reconstructed_model)
print_translation(sentence, translated_text, ground_truth)

Input:         : I want to be at home today
Prediction     : quiero estar en casa hoy
Ground truth   : Quiero estar en casa hoy


In [None]:
sentence = 'I like to drink water in the park'
ground_truth = 'Me gusta beber agua en el parque'

translated_text = translate(sentence, context_text_processor, target_text_processor, reconstructed_model)
print_translation(sentence, translated_text, ground_truth)

Input:         : I like to drink water in the park
Prediction     : me gusta beber agua en el parque
Ground truth   : Me gusta beber agua en el parque


In [None]:
sentence = 'university has taught me that life is not only about technical knowledge, but also about meeting people who can help you in the future.'
ground_truth = 'la universidad me ha enseñado que en la vida no solo importan los conocimientos técnicos, sino también conocer personas que te puedan ayudar en el futuro'

translated_text = translate(sentence, context_text_processor, target_text_processor, reconstructed_model)
print_translation(sentence, translated_text, ground_truth)

Input:         : university has taught me that life is not only about technical knowledge, but also about meeting people who can help you in the future.
Prediction     : la universidad me ha [UNK] que la vida no solo sobre conocimiento tecnica sino tambien sobre las personas que pueden [UNK] en el futuro .
Ground truth   : la universidad me ha enseñado que en la vida no solo importan los conocimientos técnicos, sino también conocer personas que te puedan ayudar en el futuro
