In [1]:
import argparse 
import datetime
import random

import numpy as np
import tensorflow as tf


args = argparse.Namespace()
args.seed = 101
args.epochs = 20
args.batch_size = 32
    
def random_date_output():
    ordinal_min = datetime.date(1000, 1, 1).toordinal()
    ordinal_max = datetime.date(9999, 12, 31).toordinal()
    return datetime.date.fromordinal(random.randint(ordinal_min, ordinal_max))

def output_date_to_input(date) -> str:
    months = ["January", "February", "March", "April", "May", "June",
          "July", "August", "September", "October", "November", "December"]
    
    return "{} {}, {}".format(months[date.month - 1], date.day, date.year)


def date_to_digits(str_date):
    str_date = str_date.lower()
    return [ord(s) for s in str_date]

def output_date_to_digits(str_date):
    return [10 if s=='-' else int(s) for s in str_date]

def input_tensor_to_date(np_date) -> str:
    date_str = "".join([chr(i) for i in np_date])
    return date_str[0].upper() + date_str[1:]

def output_tensor_to_date(tensor_date) -> str:
    return "".join(["-" if i==10 else str(i.numpy()) for i in tensor_date])

def prediction_to_output_tensor(prediction, axis=1):
    return tf.argmax(prediction, axis=axis, output_type=tf.int32)

def build_dataset(dates_num):
    input_list = []
    output_list = []
    for i in range(dates_num):
        rd = random_date_output()
        ird = output_date_to_input(rd)
        ird = date_to_digits(ird)
        rd = output_date_to_digits(str(rd))
        input_list.append(ird)
        output_list.append(rd)
    return tf.ragged.constant(input_list), tf.constant(output_list)


train_dataset = build_dataset(20000)
val_dataset = build_dataset(10000)

in_num = ord("z")
out_num = 11
max_out_len = len(train_dataset[1][0])

2022-02-16 19:21:41.753472: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
# dec will only get the enc's hidden state repeated a number of times
# converge in 13 steps
def simple_enc_dec():    
    encoder = tf.keras.models.Sequential([
        tf.keras.layers.Embedding(in_num, 32, input_shape=[None]),
        tf.keras.layers.LSTM(64, return_sequences=False)])

    # RepeatVector is a must. we translate +- 17 words seq into 10 words seq. return_sequences = True for encoder leads up to 17 words output
    # with return_sequences = False, it won't be possible to reconstruct a full seq, but one letter.
    decoder = tf.keras.models.Sequential([
        tf.keras.layers.RepeatVector(max_out_len),
        tf.keras.layers.LSTM(64, return_sequences=True),
        tf.keras.layers.Dense(out_num, activation="softmax")])
    model = tf.keras.models.Sequential([encoder, decoder])

    model.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=1e-3),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
    monitor='val_loss'
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor=monitor, patience=3, mode='auto', restore_best_weights=True, verbose=1)
    reduce_lr_on_plateau = tf.keras.callbacks.ReduceLROnPlateau(monitor=monitor, factor=0.1, patience=2, min_delta=1e-4, mode='auto', verbose=1)
    encoder.summary()
    decoder.summary()
    model.summary()
    model.fit(train_dataset[0], train_dataset[1], batch_size=args.batch_size, epochs=args.epochs, validation_data=(val_dataset[0], val_dataset[1]), callbacks=[early_stopping, reduce_lr_on_plateau])

    for i in range(5):
        print("***")
        print(val_dataset[0][i])
        print(input_tensor_to_date(val_dataset[0][i]))
        prediction = model(val_dataset[0])[i]
        output_tensor = prediction_to_output_tensor(prediction)
        print(output_tensor)
        print(output_tensor_to_date(output_tensor))
    
if True:
    simple_enc_dec()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, None, 32)          3904      
                                                                 
 lstm_2 (LSTM)               (None, 64)                24832     
                                                                 
Total params: 28,736
Trainable params: 28,736
Non-trainable params: 0
_________________________________________________________________
Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 repeat_vector_1 (RepeatVect  (None, 10, 64)           0         
 or)                                                             
                                                                 
 lstm_3 (LSTM)               (None, 10, 64)            33024     
                            



Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
***
tf.Tensor([106  97 110 117  97 114 121  32  49  54  44  32  52  54  49  51], shape=(16,), dtype=int32)
January 16, 4613
tf.Tensor([ 4  6  1  3 10  0  1 10  1  6], shape=(10,), dtype=int64)
4613-01-16
***
tf.Tensor([115 101 112 116 101 109  98 101 114  32  50  50  44  32  51  56  56  53], shape=(18,), dtype=int32)
September 22, 3885
tf.Tensor([ 3  8  8  5 10  0  9 10  2  2], shape=(10,), dtype=int64)
3885-09-22
***
tf.Tensor([106  97 110 117  97 114 121  32  49  50  44  32  51  56  53  49], shape=(16,), dtype=int32)
January 12, 3851
tf.Tensor([ 3  8  5  1 10  0  1 10  1  2], shape=(10,), dtype=int64)
3851-01-12
***
tf.Tensor([ 97 117 103 117 115 116  32  49  55  44  32  49  53  50  50], shape=(15,), dtype=int32)
August 17, 1522
tf.Tensor([ 1  5  2  2 10  0  8 10  1  

In [45]:
# train the dec with the shifted result seq.
# converge in 8 steps
sos = out_num # 10 digits + "-" = [0, 10] => "sos" = 11
def build_decoder_inputs(targets):
    soss = tf.fill([targets.shape[0], 1], sos) # targets.shape == decoder_inputs
    return tf.concat([soss, targets[:, :-1]], axis=1) # need only previous states for the targets in the decoder in -> no last char

def enc_shifted_dec():
    train_decoder_inputs = build_decoder_inputs(train_dataset[1])
    val_decoder_inputs = build_decoder_inputs(val_dataset[1])
    
    enc_in = tf.keras.layers.Input([None], ragged=True)
    enc_emb = tf.keras.layers.Embedding(in_num, 32)(enc_in)
    enc_out, enc_m_state, enc_c_state = tf.keras.layers.LSTM(64, return_sequences=False, return_state=True)(enc_emb)
    
    dec_in = tf.keras.layers.Input([max_out_len])
    dec_emb = tf.keras.layers.Embedding(sos + 1, 4)(dec_in)
    dec_lstm = tf.keras.layers.LSTM(64, return_sequences=True)(dec_emb, initial_state=[enc_m_state, enc_c_state])
    dec_out = tf.keras.layers.Dense(out_num, activation="softmax")(dec_lstm)
    
    model = tf.keras.Model(inputs=[enc_in, dec_in], outputs=[dec_out])
    model.summary()
    model.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=1e-3),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
    monitor='val_loss'
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor=monitor, patience=3, mode='auto', restore_best_weights=True, verbose=1)
    reduce_lr_on_plateau = tf.keras.callbacks.ReduceLROnPlateau(monitor=monitor, factor=0.1, patience=2, min_delta=1e-4, mode='auto', verbose=1)
    model.fit([train_dataset[0], train_decoder_inputs], train_dataset[1], batch_size=args.batch_size, epochs=args.epochs, validation_data=([val_dataset[0], val_decoder_inputs], val_dataset[1]), callbacks=[early_stopping, reduce_lr_on_plateau])
    
    def try_predict(num=5):
        x = val_dataset[0][:num][:]
        for i in range(num):  
            print(input_tensor_to_date(x[i]))
        x_sos = tf.fill([x.shape[0], 1], 11)
        y_pred = tf.zeros([x.shape[0], 0], dtype=tf.int32)
        y = tf.zeros([x.shape[0], 0], dtype=tf.int32)
        for i in range(max_out_len):
            x_dec = tf.concat([x_sos, y_pred[:, :i], tf.zeros([x.shape[0], max_out_len - i - 1], dtype=tf.int32)], axis=1)
            prediction = model.predict([x, x_dec])
            y_pred = prediction_to_output_tensor(prediction, axis=2)
            y = tf.concat([y, tf.reshape(y_pred[:, i], [-1, 1])], axis=1) # indexing one column creates a vector
            
        assert(tf.reduce_all(tf.equal(y_pred, y)))
        print(y)
        for i in range(num):  
            print(output_tensor_to_date(y[i]))
            
    try_predict()
    return model
    
if True:
    enc_shifted_dec()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 input_4 (InputLayer)           [(None, 10)]         0           []                               
                                                                                                  
 embedding_78 (Embedding)       (None, None, 32)     3904        ['input_3[0][0]']                
                                                                                                  
 embedding_79 (Embedding)       (None, 10, 4)        48          ['input_4[0][0]']                
                                                                                            



Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 19: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 20/20
Epoch 20: early stopping
April 25, 6002
August 18, 3686
November 25, 7235
September 29, 9626
December 6, 5419
tf.Tensor(
[[ 6  0  0  2 10  0  4 10  2  5]
 [ 3  6  8  6 10  0  8 10  1  8]
 [ 7  2  3  5 10  1  1 10  2  5]
 [ 9  6  2  6 10  0  9 10  2  9]
 [ 5  4  1  9 10  1  2 10  0  6]], shape=(5, 10), dtype=int32)
6002-04-25
3686-08-18
7235-11-25
9626-09-29
5419-12-06


In [None]:
import tensorflow_addons as tfa

# doesn't work. Inconsistent shapes
def tfa_attention():
    
    class DateTranslation(tf.keras.models.Model):
        def __init__(self, units=128, encoder_embedding_size=32,
                     decoder_embedding_size=32, **kwargs):
            super().__init__(**kwargs)
            self.encoder_embedding = tf.keras.layers.Embedding(
                input_dim=in_num,
                output_dim=encoder_embedding_size)
            self.encoder = tf.keras.layers.LSTM(units,
                                             return_sequences=True,
                                             return_state=True)
            self.decoder_embedding = tf.keras.layers.Embedding(
                input_dim=out_num + 2,
                output_dim=decoder_embedding_size)
            self.attention = tfa.seq2seq.LuongAttention(units)
            decoder_inner_cell = tf.keras.layers.LSTMCell(units)
            self.decoder_cell = tfa.seq2seq.AttentionWrapper(
                cell=decoder_inner_cell,
                attention_mechanism=self.attention)
            output_layer = tf.keras.layers.Dense(out_num)
            self.decoder = tfa.seq2seq.BasicDecoder(
                cell=self.decoder_cell,
                sampler=tfa.seq2seq.sampler.TrainingSampler(),
                output_layer=output_layer)
            self.inference_decoder = tfa.seq2seq.BasicDecoder(
                cell=self.decoder_cell,
                sampler=tfa.seq2seq.sampler.GreedyEmbeddingSampler(
                    embedding_fn=self.decoder_embedding),
                output_layer=output_layer,
                maximum_iterations=max_out_len)

        def call(self, inputs, training=None):
            encoder_input, decoder_input = inputs
            encoder_embeddings = self.encoder_embedding(encoder_input)
            encoder_outputs, encoder_state_h, encoder_state_c = self.encoder(
                encoder_embeddings,
                training=training)
            encoder_state = [encoder_state_h, encoder_state_c]

            self.attention(encoder_outputs,
                           setup_memory=True)

            decoder_embeddings = self.decoder_embedding(decoder_input)

            decoder_initial_state = self.decoder_cell.get_initial_state(
                decoder_embeddings)
            decoder_initial_state = decoder_initial_state.clone(
                cell_state=encoder_state)

            if training:
                decoder_outputs, _, _ = self.decoder(
                    decoder_embeddings,
                    initial_state=decoder_initial_state,
                    training=training)
            else:
                start_tokens = tf.zeros_like(encoder_input[:, 0]) + sos
                decoder_outputs, _, _ = self.inference_decoder(
                    decoder_embeddings,
                    initial_state=decoder_initial_state,
                    start_tokens=start_tokens,
                    end_token=0)
            tf.print(decoder_outputs.rnn_output.shape)
            return tf.nn.softmax(decoder_outputs.rnn_output)
    
    train_decoder_inputs = build_decoder_inputs(train_dataset[1])
    val_decoder_inputs = build_decoder_inputs(val_dataset[1])
            
    model = DateTranslation()
    model((train_dataset[0].to_tensor(0), train_decoder_inputs))
    optimizer = tf.keras.optimizers.Nadam()
    model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer,
                  metrics=["accuracy"])
    model.summary()
    history = model.fit([train_dataset[0].to_tensor(0), train_decoder_inputs], train_dataset[1], epochs=25,
                        validation_data=[[val_dataset[0].to_tensor(0), val_decoder_inputs], val_dataset[1]])

if True:
    tfa_attention()