In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import os
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"

In [3]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

Exercise 9.

In [4]:
import calendar

MONTHS = calendar.month_name[1:]
MONTHS

['January',
 'February',
 'March',
 'April',
 'May',
 'June',
 'July',
 'August',
 'September',
 'October',
 'November',
 'December']

In [5]:
from datetime import date

def random_dates(n_dates):
    min_date = date(1000, 1, 1).toordinal()
    max_date = date(9999, 12, 31).toordinal()
    
    ordinals = np.random.randint(max_date - min_date, size=n_dates) + min_date
    dates = [date.fromordinal(ordinal) for ordinal in ordinals]
    
    x = [MONTHS[dt.month - 1] + ' ' + dt.strftime('%d, %Y') for dt in dates]
    y = [dt.isoformat() for dt in dates]
    
    return x, y

In [6]:
np.random.seed(42)

n_dates = 3
x_example, y_example = random_dates(n_dates)
print("{:25s}{:25s}".format("Input", "Target"))
print("-" * 50)
for idx in range(n_dates):
    print("{:25s}{:25s}".format(x_example[idx], y_example[idx]))

Input                    Target                   
--------------------------------------------------
September 20, 7075       7075-09-20               
May 15, 8579             8579-05-15               
January 11, 7103         7103-01-11               


In [7]:
INPUT_CHARS = ''.join(sorted(set(''.join(MONTHS) + '0123456789, ')))
INPUT_CHARS

' ,0123456789ADFJMNOSabceghilmnoprstuvy'

In [8]:
OUTPUT_CHARS = '0123456789-'

In [9]:
def date_str_to_ids(date_str, chars=INPUT_CHARS):
    return [chars.index(c) for c in date_str]

In [10]:
print(x_example[0])
date_str_to_ids(x_example[0], INPUT_CHARS)

September 20, 7075


[19, 23, 31, 34, 23, 28, 21, 23, 32, 0, 4, 2, 1, 0, 9, 2, 9, 7]

In [11]:
print(y_example[0])
date_str_to_ids(y_example[0], OUTPUT_CHARS)

7075-09-20


[7, 0, 7, 5, 10, 0, 9, 10, 2, 0]

In [12]:
def prepare_date_strs(date_strs, chars=INPUT_CHARS):
    x_ids = [date_str_to_ids(dt, chars) for dt in date_strs]
    x = tf.ragged.constant(x_ids, ragged_rank=1)
    return (x + 1).to_tensor()

def create_dateset(n_dates):
    x, y = random_dates(n_dates)
    return prepare_date_strs(x, INPUT_CHARS), prepare_date_strs(y, OUTPUT_CHARS)

In [13]:
np.random.seed(42)

x_train, y_train = create_dateset(10000)
x_valid, y_valid = create_dateset(2000)
x_test, y_test = create_dateset(2000)

In [14]:
x_train[0]

<tf.Tensor: shape=(18,), dtype=int32, numpy=
array([20, 24, 32, 35, 24, 29, 22, 24, 33,  1,  5,  3,  2,  1, 10,  3, 10,
        8])>

In [15]:
y_train[0]

<tf.Tensor: shape=(10,), dtype=int32, numpy=array([ 8,  1,  8,  6, 11,  1, 10, 11,  3,  1])>

basic seq2seq model

In [16]:
embedding_size = 32
max_output_length = y_train.shape[1]

np.random.seed(42)
tf.random.set_seed(42)

encoder = keras.models.Sequential([
    keras.layers.Embedding(input_dim=len(INPUT_CHARS) + 1, 
                           output_dim=embedding_size, 
                           input_shape=[None]),
    keras.layers.LSTM(128)
])

decoder = keras.models.Sequential([
    keras.layers.LSTM(128, return_sequences=True),
    keras.layers.Dense(len(OUTPUT_CHARS) + 1, activation='softmax')
])

model = keras.models.Sequential([
    encoder,
    keras.layers.RepeatVector(max_output_length),
    decoder
])

In [17]:
model.compile(loss='sparse_categorical_crossentropy', 
              optimizer='nadam', 
              metrics=['acc'])

history = model.fit(x_train, y_train, epochs=20, 
                    validation_data=(x_valid, y_valid))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [18]:
def ids_to_date_strs(ids, chars=OUTPUT_CHARS):
    return [''.join([('?' + chars)[index] for index in sequence]) 
            for sequence in ids]

In [19]:
x_new = prepare_date_strs(['September 17, 2009', 'July 14, 1789'])
ids = np.argmax(model.predict(x_new), axis=-1)
for date_str in ids_to_date_strs(ids):
    print(date_str)

2009-09-17
1789-07-14


In [20]:
x_new = prepare_date_strs(['May 02, 2020', 'July 14, 1789'])
ids = np.argmax(model.predict(x_new), axis=-1)
for date_str in ids_to_date_strs(ids):
    print(date_str)

2020-01-02
1789-01-14


In [21]:
max_input_length = x_train.shape[1]

def prepare_date_strs_padded(date_strs):
    x = prepare_date_strs(date_strs)
    if x.shape[1] < max_input_length:
        x = tf.pad(x, [[0, 0], [0, max_input_length - x.shape[1]]])
    return x

def convert_date_strs(date_strs):
    x = prepare_date_strs_padded(date_strs)
    ids = np.argmax(model.predict(x), axis=-1)
    return ids_to_date_strs(ids)

In [22]:
convert_date_strs(['May 02, 2020', 'July 14, 1789'])

['2020-05-02', '1789-07-14']

feeding the shifted targets to the decoder (teacher forcing)

In [23]:
sos_id = len(OUTPUT_CHARS) + 1
sos_id

12

In [24]:
def shifted_output_sequences(y):
    sos_tokens = tf.fill(dims=(len(y), 1), value=sos_id)
    return tf.concat([sos_tokens, y[:, :-1]], axis=1)

In [25]:
x_train_decoder = shifted_output_sequences(y_train)
x_valid_decoder = shifted_output_sequences(y_valid)
x_test_decoder = shifted_output_sequences(y_test)

In [26]:
x_train_decoder

<tf.Tensor: shape=(10000, 10), dtype=int32, numpy=
array([[12,  8,  1, ..., 10, 11,  3],
       [12,  9,  6, ...,  6, 11,  2],
       [12,  8,  2, ...,  2, 11,  2],
       ...,
       [12, 10,  8, ...,  2, 11,  4],
       [12,  2,  2, ...,  3, 11,  3],
       [12,  8,  9, ...,  8, 11,  3]])>

In [27]:
encoder_embedding_size = 32
decoder_embedding_size = 32
lstm_units = 128

np.random.seed(42)
tf.random.set_seed(42)

encoder_input = keras.layers.Input(shape=[None], dtype=tf.int32)
encoder_embedding = keras.layers.Embedding(
    input_dim=len(INPUT_CHARS) + 1, 
    output_dim=encoder_embedding_size)(encoder_input)
_, encoder_state_h, encoder_state_c = keras.layers.LSTM(
    lstm_units, return_state=True)(encoder_embedding)
encoder_state = [encoder_state_h, encoder_state_c]

decoder_input = keras.layers.Input(shape=[None], dtype=tf.int32)
decoder_embedding = keras.layers.Embedding(
    input_dim=len(OUTPUT_CHARS) + 2, 
    output_dim=decoder_embedding_size)(decoder_input)
decoder_lstm_output = keras.layers.LSTM(
    lstm_units, return_sequences=True)(decoder_embedding, 
                                       initial_state=encoder_state)
decoder_output = keras.layers.Dense(
    len(OUTPUT_CHARS) + 1, activation='softmax')(decoder_lstm_output)

model = keras.models.Model(inputs=[encoder_input, decoder_input], 
                           outputs=[decoder_output])

In [28]:
model.compile(loss='sparse_categorical_crossentropy', 
              optimizer='nadam', 
              metrics=['acc'])

history = model.fit([x_train, x_train_decoder], y_train, epochs=10, 
                    validation_data=([x_valid, x_valid_decoder], y_valid))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [29]:
sos_id = len(OUTPUT_CHARS) + 1

def predict_date_strs(date_strs):
    x = prepare_date_strs_padded(date_strs)
    y_pred = tf.fill(dims=(len(x), 1), value=sos_id)
    for index in range(max_output_length):
        pad_size = max_output_length - y_pred.shape[1]
        x_decoder = tf.pad(y_pred, [[0, 0], [0, pad_size]])
        y_probas_next = model.predict([x, x_decoder])[:, index:index+1]
        y_pred_next = tf.argmax(y_probas_next, axis=-1, output_type=tf.int32)
        y_pred = tf.concat([y_pred, y_pred_next], axis=1)
    return ids_to_date_strs(y_pred[:, 1:])

In [30]:
predict_date_strs(['May 02, 2020', 'July 14, 1789'])

['2020-05-02', '1789-07-14']

TF-Addons seq2seq implementation

In [31]:
np.random.seed(42)
tf.random.set_seed(42)

In [32]:
import tensorflow_addons as tfa

encoder_embedding_size = 32
decoder_embedding_size = 32
units = 128

encoder_inputs = keras.layers.Input(shape=[None], dtype=tf.int32)

encoder_embeddings = keras.layers.Embedding(
    len(INPUT_CHARS) + 1, encoder_embedding_size)(encoder_inputs)

encoder = keras.layers.LSTM(units, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_embeddings)
encoder_state = [state_h, state_c]

decoder_inputs = keras.layers.Input(shape=[None], dtype=tf.int32)

decoder_embedding_layer = keras.layers.Embedding(
    len(OUTPUT_CHARS) + 2, decoder_embedding_size)
decoder_embeddings = decoder_embedding_layer(decoder_inputs)

decoder_cell = keras.layers.LSTMCell(units)
output_layer = keras.layers.Dense(len(OUTPUT_CHARS) + 1)

sampler = tfa.seq2seq.sampler.TrainingSampler()

decoder = tfa.seq2seq.basic_decoder.BasicDecoder(decoder_cell, 
                                                 sampler, 
                                                 output_layer=output_layer)
final_outputs, final_state, final_sequence_lengths = decoder(
    decoder_embeddings, initial_state=encoder_state)

y_proba = keras.layers.Activation('softmax')(final_outputs.rnn_output)

model = keras.models.Model(inputs=[encoder_inputs, decoder_inputs], outputs=[y_proba])

In [33]:
model.compile(loss='sparse_categorical_crossentropy', 
              optimizer='nadam', 
              metrics=['acc'])

history = model.fit([x_train, x_train_decoder], y_train, epochs=15, 
                    validation_data=([x_valid, x_valid_decoder], y_valid))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [34]:
predict_date_strs(['May 02, 2020', 'July 14, 1789'])

['2020-03-02', '1789-07-14']

In [35]:
inference_sampler = tfa.seq2seq.sampler.GreedyEmbeddingSampler(
    embedding_fn=decoder_embedding_layer)
inference_decoder = tfa.seq2seq.basic_decoder.BasicDecoder(
    decoder_cell, inference_sampler, output_layer=output_layer, 
    maximum_iterations=max_output_length)
batch_size = tf.shape(encoder_inputs)[:1]
start_tokens = tf.fill(dims=batch_size, value=sos_id)
final_outputs, final_state, final_sequence_lengths = inference_decoder(
    start_tokens, initial_state=encoder_state, 
    start_tokens=start_tokens, end_token=0)

inference_model = keras.models.Model(inputs=[encoder_inputs], 
                                     outputs=[final_outputs.sample_id])

In [36]:
def fast_predict_date_strs(date_strs):
    x = prepare_date_strs_padded(date_strs)
    y_pred = inference_model.predict(x)
    return ids_to_date_strs(y_pred)

In [37]:
fast_predict_date_strs(['May 02, 2020', 'July 14, 1789'])

['2020-03-02', '1789-07-14']

In [38]:
%timeit predict_date_strs(['May 02, 2020', 'July 14, 1789'])

424 ms ± 13.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [39]:
%timeit fast_predict_date_strs((['May 02, 2020', 'July 14, 1789']))

44.6 ms ± 852 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


TF-Addons seq2seq implementation with scheduled sampler

In [40]:
np.random.seed(42)
tf.random.set_seed(42)

In [41]:
n_epochs = 20
encoder_embedding_size = 32
decoder_embedding_size = 32
units = 128

encoder_inputs = keras.layers.Input(shape=[None], dtype=tf.int32)

encoder_embeddings = keras.layers.Embedding(
    len(INPUT_CHARS) + 1, encoder_embedding_size)(encoder_inputs)

encoder = keras.layers.LSTM(units, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_embeddings)
encoder_state = [state_h, state_c]

decoder_inputs = keras.layers.Input(shape=[None], dtype=tf.int32)

decoder_embedding_layer = keras.layers.Embedding(
    len(OUTPUT_CHARS) + 2, decoder_embedding_size)
decoder_embeddings = decoder_embedding_layer(decoder_inputs)

decoder_cell = keras.layers.LSTMCell(units)
output_layer = keras.layers.Dense(len(OUTPUT_CHARS) + 1)

sampler = tfa.seq2seq.sampler.ScheduledEmbeddingTrainingSampler(
    sampling_probability=0., embedding_fn=decoder_embedding_layer)
sampler.sampling_probability = tf.Variable(0.)

decoder = tfa.seq2seq.basic_decoder.BasicDecoder(decoder_cell, 
                                                 sampler, 
                                                 output_layer=output_layer)
final_outputs, final_state, final_sequence_lengths = decoder(
    decoder_embeddings, initial_state=encoder_state)

y_proba = keras.layers.Activation('softmax')(final_outputs.rnn_output)

model = keras.models.Model(inputs=[encoder_inputs, decoder_inputs], outputs=[y_proba])

In [42]:
def update_sampling_probability(epoch, logs):
    proba = min(1.0, epoch / (n_epochs - 10))
    sampler.sampling_probability.assign(proba)

In [43]:
sampling_probability_cb = keras.callbacks.LambdaCallback(
    on_epoch_begin=update_sampling_probability)

In [44]:
model.compile(loss='sparse_categorical_crossentropy', 
              optimizer='nadam', 
              metrics=['acc'])

history = model.fit([x_train, x_train_decoder], y_train, epochs=n_epochs, 
                    validation_data=([x_valid, x_valid_decoder], y_valid), 
                    callbacks=[sampling_probability_cb])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [45]:
softmax_temperature = tf.Variable(1.)

inference_sampler = tfa.seq2seq.sampler.SampleEmbeddingSampler(
    embedding_fn=decoder_embedding_layer, 
    softmax_temperature=softmax_temperature)
inference_decoder = tfa.seq2seq.basic_decoder.BasicDecoder(
    decoder_cell, inference_sampler, output_layer=output_layer, 
    maximum_iterations=max_output_length)
batch_size = tf.shape(encoder_inputs)[:1]
start_tokens = tf.fill(dims=batch_size, value=sos_id)
final_outputs, final_state, final_sequence_lengths = inference_decoder(
    start_tokens, initial_state=encoder_state, 
    start_tokens=start_tokens, end_token=0)

inference_model = keras.models.Model(inputs=[encoder_inputs], 
                                     outputs=[final_outputs.sample_id])

In [46]:
def creative_predict_date_strs(date_strs, temperature=1.0):
    softmax_temperature.assign(temperature)
    x = prepare_date_strs_padded(date_strs)
    y_pred = inference_model.predict(x)
    return ids_to_date_strs(y_pred)

In [47]:
creative_predict_date_strs(['May 02, 2020', 'July 14, 1789'])

['2020-05-02', '1789-07-14']

In [48]:
creative_predict_date_strs(['May 02, 2020', 'July 14, 1789'], 
                           temperature=5.)

['9122--3000', '1789-06411']

TFA seq2seq, the Keras subclassing API and attention mechanisms

In [49]:
class DateTranslation(keras.models.Model):
    def __init__(self, units=128, encoder_embedding_size=32, 
                 decoder_embedding_size=32, **kwargs):
        super().__init__(**kwargs)
        self.encoder_embedding = keras.layers.Embedding(
            input_dim=len(INPUT_CHARS) + 1, 
            output_dim=encoder_embedding_size)
        self.encoder = keras.layers.LSTM(units, 
                                         return_sequences=True, 
                                         return_state=True)
        self.decoder_embedding = keras.layers.Embedding(
            input_dim=len(OUTPUT_CHARS) + 2, 
            output_dim=decoder_embedding_size)
        self.attention = tfa.seq2seq.LuongAttention(units)
        decoder_inner_cell = keras.layers.LSTMCell(units)
        self.decoder_cell = tfa.seq2seq.AttentionWrapper(
            cell=decoder_inner_cell, 
            attention_mechanism=self.attention)
        output_layer = keras.layers.Dense(len(OUTPUT_CHARS) + 1)
        self.decoder = tfa.seq2seq.BasicDecoder(
            cell=self.decoder_cell, 
            sampler=tfa.seq2seq.sampler.TrainingSampler(), 
            output_layer=output_layer)
        self.inference_decoder = tfa.seq2seq.BasicDecoder(
            cell=self.decoder_cell, 
            sampler=tfa.seq2seq.sampler.GreedyEmbeddingSampler(
                embedding_fn=self.decoder_embedding), 
            output_layer=output_layer, 
            maximum_iterations=max_output_length)
        
    def call(self, inputs, training=None):
        encoder_input, decoder_input = inputs
        encoder_embeddings = self.encoder_embedding(encoder_input)
        encoder_outputs, encoder_state_h, encoder_state_c = self.encoder(
            encoder_embeddings, 
            training=training)
        encoder_state = [encoder_state_h, encoder_state_c]
        
        self.attention(encoder_outputs, setup_memory=True)
        
        decoder_embeddings = self.decoder_embedding(decoder_input)
        decoder_initial_state = self.decoder_cell.get_initial_state(
            decoder_embeddings)
        decoder_initial_state = decoder_initial_state.clone(
            cell_state=encoder_state)
        
        if training:
            decoder_outputs, _, _ = self.decoder(
                decoder_embeddings, 
                initial_state=decoder_initial_state, 
                training=training)
            
        else:
            start_tokens = tf.zeros_like(encoder_input[:, 0]) + sos_id
            decoder_outputs, _, _ = self.inference_decoder(
                decoder_embeddings, 
                initial_state=decoder_initial_state, 
                start_tokens=start_tokens, 
                end_token=0)
        
        return tf.nn.softmax(decoder_outputs.rnn_output)

In [50]:
np.random.seed(42)
tf.random.set_seed(42)

In [51]:
model = DateTranslation()
model.compile(loss='sparse_categorical_crossentropy', 
              optimizer='nadam', 
              metrics=['acc'])
history = model.fit([x_train, x_train_decoder], y_train, epochs=25, 
                    validation_data=([x_valid, x_valid_decoder], y_valid))

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [52]:
def fast_predict_date_strs_v2(date_strs):
    x = prepare_date_strs_padded(date_strs)
    x_decoder = tf.zeros(shape=(len(x), max_output_length), dtype=tf.int32)
    y_probas = model.predict([x, x_decoder])
    y_pred = tf.argmax(y_probas, axis=-1)
    return ids_to_date_strs(y_pred)

In [53]:
fast_predict_date_strs_v2((['May 02, 2020', 'July 14, 1789']))

['2020-05-02', '1789-07-14']