Train the model

In [14]:
from keras_transformer import get_model, decode

import tensorflow as tf
import numpy as np
import joblib

In [9]:
#Load the data
SAVE_PATH = "../../../../../../data/processed/translator-data/es-en/dataset.npz"

npz = np.load(SAVE_PATH,  allow_pickle=True)
enc_inputs, dec_inputs, dec_outputs = npz['enc_inputs'], npz['dec_inputs'], npz['dec_outputs']

In [16]:
#Load the tokenizers
TOKENIZER_EN_PATH = '../../../../../../exports/translator/tokenizers/english/tokenizer.pkl'
TOKENIZER_ES_PATH = '../../../../../../exports/translator/tokenizers/spanish/tokenizer.pkl'

tokenizer_en = joblib.load(TOKENIZER_EN_PATH)
tokenizer_es = joblib.load(TOKENIZER_ES_PATH)

In [26]:
#Train the model

# Hyperparameters
D_MODEL = 128 # 512
NB_LAYERS = 4 # 6
FFN_UNITS = 512 # 2048
NB_PROJ = 8
DROPOUT_RATE = 0.05
VOCAB_SIZE_EN = tokenizer_en.vocab_size + 2
VOCAB_SIZE_ES = tokenizer_es.vocab_size + 2

transformer = get_model(
    token_num = max(VOCAB_SIZE_ES, VOCAB_SIZE_EN),
    embed_dim = 32,
    encoder_num = NB_LAYERS,
    decoder_num = NB_LAYERS,
    head_num = NB_PROJ,
    hidden_dim = FFN_UNITS,
    dropout_rate = DROPOUT_RATE,
    use_same_embed = False,
)
transformer.compile('adam', 'sparse_categorical_crossentropy')
transformer.summary()



Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 Encoder-Input (InputLayer)  [(None, None)]               0         []                            
                                                                                                  
 Encoder-Token-Embedding (E  [(None, None, 32),           268832    ['Encoder-Input[0][0]']       
 mbeddingRet)                 (8401, 32)]                                                         
                                                                                                  
 Encoder-Embedding (TrigPos  (None, None, 32)             0         ['Encoder-Token-Embedding[0][0
 Embedding)                                                         ]']                           
                                                                                            

In [27]:
checkpoint_path = "../../../../../../checkpoints/translator/es-en/transformer"

ckpt = tf.train.Checkpoint(transformer=transformer)
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=2)

if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print("Last checkpoint restored!!")

In [None]:
# Training
x = [enc_inputs, dec_inputs]

transformer.fit(x, dec_outputs, epochs=15, batch_size=32)
ckpt_manager.save()