In [155]:
import argparse 
import datetime
import random

import numpy as np
import tensorflow as tf


args = argparse.Namespace()
args.seed = 101
args.epochs = 20
args.batch_size = 32
    
def random_date_output():
    ordinal_min = datetime.date(1000, 1, 1).toordinal()
    ordinal_max = datetime.date(9999, 12, 31).toordinal()
    return datetime.date.fromordinal(random.randint(ordinal_min, ordinal_max))

def output_date_to_input(date) -> str:
    months = ["January", "February", "March", "April", "May", "June",
          "July", "August", "September", "October", "November", "December"]
    
    return "{} {}, {}".format(months[date.month - 1], date.day, date.year)


def date_to_np(str_date):
    str_date = str_date.lower()
    return np.array([ord(s) for s in str_date])

def output_date_to_np(str_date):
    return np.array([10 if s=='-' else int(s) for s in str_date])

def input_tensor_to_date(np_date):
    date_str = "".join([chr(i) for i in np_date])
    return date_str[0].upper() + date_str[1:]

def output_tensor_to_date(tensor_date):
    return "".join(["-" if i==10 else str(i.numpy()) for i in tensor_date])

def prediction_to_output_tensor(prediction):
    return tf.argmax(prediction, axis=1)

def build_dataset(dates_num):
    dataset = []
    input_list = []
    output_list = []
    for i in range(dates_num):
        rd = random_date_output()
        ird = output_date_to_input(rd)
        ird = date_to_np(ird)
        rd = output_date_to_np(str(rd))
        input_list.append(ird.tolist())
        output_list.append(rd.tolist())
        dataset.append([ird, rd])
    # print(input_list)
    # print(output_list)
    dataset = np.array(dataset, dtype=object)
    
    # return dataset[:, 0], dataset[:, 1]
    return tf.ragged.constant(input_list), tf.constant(output_list)


train_dataset = build_dataset(20000)
val_dataset = build_dataset(10000)

max_int_len = ord("z")
max_out_len = len(train_dataset[1][0])
out_num = 11
print(max_out)
# train_dataset
# np.vstack(train_dataset[1])

10


In [113]:
encoder = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(max_int, 32, input_shape=[None]),
    tf.keras.layers.LSTM(64, return_sequences=False)])

# RepeatVector is a must. we translate +- 17 words seq into 10 words seq. return_sequences = True for encoder leads up to 17 words output
# with return_sequences = False, it won't be possible to reconstruct a full seq, but one letter.
decoder = tf.keras.models.Sequential([
    tf.keras.layers.RepeatVector(max_out_len),
    tf.keras.layers.LSTM(64, return_sequences=True),
    tf.keras.layers.Dense(out_num, activation="softmax")])
model = tf.keras.models.Sequential([encoder, decoder])

model.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=1e-3),
          loss='sparse_categorical_crossentropy',
          metrics=['accuracy'])
monitor='val_loss'
early_stopping = tf.keras.callbacks.EarlyStopping(monitor=monitor, patience=3, mode='auto', restore_best_weights=True, verbose=1)
reduce_lr_on_plateau = tf.keras.callbacks.ReduceLROnPlateau(monitor=monitor, factor=0.1, patience=2, min_delta=1e-4, mode='auto', verbose=1)
model.summary()
model.fit(train_dataset[0], train_dataset[1], batch_size=args.batch_size, epochs=args.epochs, validation_data=(val_dataset[0], val_dataset[1]), callbacks=[early_stopping, reduce_lr_on_plateau])


# print(train_dataset[0][0])
# print(train_dataset[0][0].reshape(1, -1, 1).shape)
# model(train_dataset[0][0].reshape(1, -1, 1))

# print(train_dataset[0])
# print(train_dataset[0][0].reshape(2, -1, 1).shape)
# model(train_dataset[0])

Model: "sequential_82"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_80 (Sequential)  (None, 64)                28736     
                                                                 
 sequential_81 (Sequential)  (None, 10, 11)            33739     
                                                                 
Total params: 62,475
Trainable params: 62,475
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20




Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x153aa04f0>

In [158]:
print(val_dataset[0][2])
print(input_tensor_to_date(val_dataset[0][2]))
prediction = model(val_dataset[0])[2]
output_tensor = prediction_to_output_tensor(prediction)
print(output_tensor)
print(output_tensor_to_date(output_tensor))

tf.Tensor([100 101  99 101 109  98 101 114  32  52  44  32  57  55  50  50], shape=(16,), dtype=int32)
December 4, 9722
tf.Tensor([ 9  7  2  2 10  1  2 10  0  4], shape=(10,), dtype=int64)
9722-12-04


In [154]:
for s in "abc0123, ":
    print(ord(s))
    
[[1, 2],[3, 4]][0][:]
np.array([np.array([1, 2]), np.array([3, 4])])

tf.argmax(tf.constant([[1, 2, 3], [2, 3, 4]]), axis=1)
"".join(["-" if i==10 else str(i.numpy()) for i in tf.constant([1, 2, 3])])

97
98
99
48
49
50
51
44
32


'123'