In [1]:
!ls

README.md
Transformer_Architecture.ipynb
Transformer_training.ipynb
__pycache__
test.py
tf_transformer.py


In [2]:
import tensorflow as tf
import tensorflow_datasets as tfds

In [7]:
xamples, metadata = tfds.load('ted_hrlr_translate/pt_to_en', with_info=True,
                               as_supervised=True)
train_examples, val_examples = xamples['train'], xamples['validation']



In [8]:
tokenizer_en = tfds.features.text.SubwordTextEncoder.build_from_corpus(
    (en.numpy() for pt, en in train_examples), target_vocab_size=2**13)

tokenizer_pt = tfds.features.text.SubwordTextEncoder.build_from_corpus(
    (pt.numpy() for pt, en in train_examples), target_vocab_size=2**13)

In [9]:
sample_string = 'Transformer is awesome.'

tokenized_string = tokenizer_en.encode(sample_string)
print ('Tokenized string is {}'.format(tokenized_string))

original_string = tokenizer_en.decode(tokenized_string)
print ('The original string: {}'.format(original_string))

assert original_string == sample_string

Tokenized string is [7915, 1248, 7946, 7194, 13, 2799, 7877]
The original string: Transformer is awesome.


In [112]:
BUFFER_SIZE = 20000
BATCH_SIZE = 64
EPOCHS = 100

In [16]:
def encode(lang1, lang2):
    lang1 = [tokenizer_pt.vocab_size] + tokenizer_pt.encode(
      lang1.numpy()) + [tokenizer_pt.vocab_size+1]

    lang2 = [tokenizer_en.vocab_size] + tokenizer_en.encode(lang2.numpy()) + [tokenizer_en.vocab_size+1]
    return lang1, lang2

In [17]:
MAX_LENGTH = 40

In [18]:
def filter_max_length(x, y, max_length=MAX_LENGTH):
    return tf.logical_and(tf.size(x) <= max_length,
                        tf.size(y) <= max_length)

In [19]:
def tf_encode(pt, en):
    return tf.py_function(encode, [pt, en], [tf.int64, tf.int64])

In [20]:
train_dataset = train_examples.map(tf_encode)
train_dataset = train_dataset.filter(filter_max_length)
# cache the dataset to memory to get a speedup while reading from it.
train_dataset = train_dataset.cache()
train_dataset = train_dataset.shuffle(BUFFER_SIZE).padded_batch(
    BATCH_SIZE, padded_shapes=([-1], [-1]))
train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE)


val_dataset = val_examples.map(tf_encode)
val_dataset = val_dataset.filter(filter_max_length).padded_batch(
    BATCH_SIZE, padded_shapes=([-1], [-1]))

In [78]:
pt_batch, en_batch = next(iter(train_dataset))
pt_batch, en_batch

(<tf.Tensor: id=546744, shape=(64, 40), dtype=int64, numpy=
 array([[8214,   67,   92, ...,    0,    0,    0],
        [8214,   11,  560, ...,    0,    0,    0],
        [8214,    6,   42, ...,    0,    0,    0],
        ...,
        [8214,   23,   88, ...,    0,    0,    0],
        [8214, 5079,   12, ...,    0,    0,    0],
        [8214,   42,   13, ...,    0,    0,    0]], dtype=int64)>,
 <tf.Tensor: id=546745, shape=(64, 40), dtype=int64, numpy=
 array([[8087,   94,  119, ...,    0,    0,    0],
        [8087,    5, 7799, ...,    0,    0,    0],
        [8087,    4, 1644, ...,    0,    0,    0],
        ...,
        [8087,   18,   83, ...,    0,    0,    0],
        [8087,  190,    8, ...,    0,    0,    0],
        [8087,  111,    1, ...,    0,    0,    0]], dtype=int64)>)

In [46]:
import importlib
import tf_transformer

Loading New Transformer Models


In [113]:
# HYPERPARAMETERS
tf_transformer = importlib.reload(tf_transformer)
num_layers = 4
d_model = 256
dff = 1024
num_heads = 8
input_vocab_size = tokenizer_pt.vocab_size + 2
target_vocab_size = tokenizer_en.vocab_size + 2
dropout_rate = 0.1

model = tf_transformer.Transformer(input_vocab_size, target_vocab_size, 
                                   d_model = d_model, num_heads = num_heads, num_layers = num_layers,
                                   dff = dff, dropout = dropout_rate)

Loading Transformer Module


In [None]:
epoch_losses = []
try:
    for epoch in range(EPOCHS):
        print('-- EPOCH ', epoch ,' --')
        epoch_loss = model.fit_on_tfds(train_dataset)
        epoch_losses.append(epoch_loss)
        
        validation_loss = model.fit_on_tfds(val_dataset, train = False)
        print('\tValidation loss: {0:.5f}'.format(validation_loss))
        
except KeyboardInterrupt:
    print('Training interupted!')

-- EPOCH  0  --
	Batch 703, Loss: 5.76744593

	Validation loss: 5.71678
-- EPOCH  1  --
	Batch 703, Loss: 4.97016723

	Validation loss: 4.89051
-- EPOCH  2  --
	Batch 703, Loss: 4.61345079

	Validation loss: 4.58050
-- EPOCH  3  --
	Batch 703, Loss: 4.43890161

	Validation loss: 4.36067
-- EPOCH  4  --
	Batch 703, Loss: 4.26140932

	Validation loss: 4.20199
-- EPOCH  5  --
	Batch 703, Loss: 4.10539548

	Validation loss: 4.04393
-- EPOCH  6  --
	Batch 703, Loss: 3.99264248

	Validation loss: 3.82296
-- EPOCH  7  --
	Batch 703, Loss: 3.70851607

	Validation loss: 3.63166
-- EPOCH  8  --
	Batch 703, Loss: 3.58518097

	Validation loss: 3.41145
-- EPOCH  9  --
	Batch 703, Loss: 3.45262768

	Validation loss: 3.21457
-- EPOCH  10  --
	Batch 703, Loss: 3.29081900

	Validation loss: 3.03516
-- EPOCH  11  --
	Batch 703, Loss: 3.16420695

	Validation loss: 2.88559
-- EPOCH  12  --
	Batch 703, Loss: 3.07432368

	Validation loss: 2.71122
-- EPOCH  13  --
	Batch 703, Loss: 2.94018526

	Validation lo

In [None]:
model.model.save_weights('./weights/transformer_100_epochs', save_format = 'tf')