In [1]:
from PIL import ImageFont, ImageDraw, Image
from fontTools.ttLib import TTFont

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

import time
import datetime
import multiprocessing

In [2]:
VOCAB = 28; EBD_DIM = 256; UNIT_DIM = 128; BATCH_SIZE = 256

## Load Data

In [3]:
class Glyph(object):
    # transform character to bitmap
    def __init__(self, fonts, size=64):
        # load fonts, size. We will use 2 fonts for all CJK characters, so keep 2 codepoint books.
        self.codepoints = [set() for _ in fonts]
        self.size = int(size * 0.8)
        self.size_img = size
        self.pad = (size - self.size) // 2
        self.fonts = [ImageFont.truetype(f, self.size) for f in fonts]
        # use a cache to reduce computation if duplicated characters encountered.
        self.cache = {}
        for cp, font in zip(self.codepoints, fonts):
            font = TTFont(font)
            # store codepoints in font cmap into self.codepoints
            for cmap in font['cmap'].tables:
                if not cmap.isUnicode():
                    continue
                for k in cmap.cmap:
                    cp.add(k)
    
    def draw(self, ch):
        if ch in self.cache:
            return self.cache[ch]
        # search among fonts, use the first found
        exist = False
        for i in range(len(self.codepoints)):
            if ord(ch) in self.codepoints[i]:
                font = self.fonts[i]
                exist = True
                break
        if not exist:
            return None

        img = Image.new('L', (self.size_img, self.size_img), 0)
        draw = ImageDraw.Draw(img)
        (width, baseline), (offset_x, offset_y) = font.font.getsize(ch)
        draw.text((self.pad - offset_x, self.pad - offset_y + 4), ch, font=font, fill=255, stroke_fill=255) 
        img_array = np.array(img.getdata(), dtype='float32').reshape((self.size_img, self.size_img)) / 255
        self.cache[ch] = img_array

        return img_array

In [4]:
glyphbook = Glyph(['data/fonts/HanaMinA.otf', 'data/fonts/HanaMinB.otf'])

def _mapping(item):
    char, code, dup_total, dup_curr = item
    glyph = glyphbook.draw(char)
    if glyph is not None:
        return glyph, code, dup_total, dup_curr

def preprocess_chart(chart, cores=multiprocessing.cpu_count()):
    glyphs = []; codes = []
    dup_total = []; dup_curr = []
    with multiprocessing.Pool(processes=cores) as pool:
        for item in pool.map(_mapping, chart.values):
            if item is not None:
                glyphs.append(item[0])
                codes.append(item[1])
                dup_total.append(item[2])
                dup_curr.append(item[3])
    return np.expand_dims(np.array(glyphs), -1), np.array(codes), np.array(dup_total), np.array(dup_curr)

In [5]:
def tokenizer(code_table):
    # Cangjie code consists only of a-z, with maximum length of 5, minimum of 1
    # start with 0, a-z are 1-26, end and padding are 27
    tokens = np.expand_dims(np.zeros(code_table.shape, dtype='int64'), -1)
    code_index = list(map(lambda x: list(map(lambda y: ord(y) - 96, list(x))) + [27] * (5-len(x)), code_table))
    tokens = np.append(tokens, np.array(code_index), axis=-1)
    return tokens

In [6]:
code_chart = pd.read_csv('data/cangjie6.txt', delimiter='\t', header=None, names=['Char', 'Code'], keep_default_na=False)

In [7]:
count = {}
for char, code in code_chart.values:
    if char in count:
        count[char].append(code)
        count[char] = sorted(count[char])
    else:
        count[char] = [code]

In [8]:
MAX_LEN = code_chart.Code.map(len).max()
MAX_DUP = max(map(lambda x: len(x), count.values()))

In [9]:
code_chart['DuplicateTotal'] = code_chart['Char'].map(count).map(len).copy()
code_chart['DuplicateCurrent'] = code_chart.apply(lambda x: count[x['Char']].index(x['Code']) + 1, axis=1).copy()

In [10]:
glyphs, codes, dups_total, dups_curr = preprocess_chart(code_chart)
tokens = tokenizer(codes)
lengths = np.array([len(list(filter(lambda i: i < VOCAB - 1 and i > 0, x))) for x in tokens])
lengths = np.array([np.identity(MAX_LEN)[i-1] for i in lengths], dtype='int64')
dups_total = np.array([np.identity(MAX_DUP)[i-1] for i in dups_total], dtype='int64')
dups_curr = np.array([np.identity(MAX_DUP)[i-1] for i in dups_curr], dtype='int64')
del code_chart, codes, count

In [11]:
(train_glyphs, validation_glyphs,
 train_tokens, validation_tokens,
 train_lengths, validation_lengths,
 train_dups_total, validation_dups_total,
 train_dups_curr, validation_dups_curr) = train_test_split(
    glyphs, tokens, lengths, dups_total, dups_curr, test_size=0.1, random_state=1225)
del glyphs, tokens, lengths, dups_total, dups_curr

In [12]:
num_steps = len(train_glyphs) // BATCH_SIZE
num_steps_val = len(validation_glyphs) // BATCH_SIZE

dataset = tf.data.Dataset.from_tensor_slices((train_glyphs, train_tokens, train_lengths, train_dups_total, train_dups_curr))
dataset = dataset.shuffle(train_glyphs.shape[0]).batch(BATCH_SIZE)
dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

val_dataset = tf.data.Dataset.from_tensor_slices((validation_glyphs, validation_tokens, validation_lengths, validation_dups_total, validation_dups_curr))
val_dataset = val_dataset.shuffle(validation_glyphs.shape[0]).batch(BATCH_SIZE)
val_dataset = val_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

del train_glyphs, validation_glyphs, train_tokens, validation_tokens, train_lengths, validation_lengths
del train_dups_total, validation_dups_total, train_dups_curr, validation_dups_curr

## Model

In [13]:
class Res_CNN(tf.keras.Model):
    def __init__(self, feature_dim, kernel_size):
        super(Res_CNN, self).__init__()
        self.cnn1 = tf.keras.layers.Convolution2D(feature_dim, kernel_size, padding='same')
        self.cnn2 = tf.keras.layers.Convolution2D(feature_dim, kernel_size, padding='same')
        self.cnn3 = tf.keras.layers.Convolution2D(feature_dim, kernel_size, padding='same')
        
    def call(self, x):
        x = self.cnn1(x)
        x_identity = tf.identity(x)
        x = self.cnn2(x)
        x_identity2 = tf.identity(x)
        x = self.cnn3(x + x_identity)
        return x + x_identity2

In [14]:
class CNN_Encoder(tf.keras.Model):
    # This is essentially a CNN layer, 
    def __init__(self, embedding_dim):
        super(CNN_Encoder, self).__init__()
        self.res_cnn1 = Res_CNN(embedding_dim // 16, (3, 3))
        self.norm1 = tf.keras.layers.BatchNormalization()
        self.pool1 = tf.keras.layers.MaxPool2D((2, 2))
        self.res_cnn2 = Res_CNN(embedding_dim // 4, (3, 3))
        self.norm2 = tf.keras.layers.BatchNormalization()
        self.pool2 = tf.keras.layers.MaxPool2D((2, 2))
        self.res_cnn3 = Res_CNN(embedding_dim, (3, 3))
        self.norm3 = tf.keras.layers.BatchNormalization()
        self.fc = tf.keras.layers.Dense(embedding_dim, activation='relu')

    def call(self, x, training=True):
        # x shape after cnn1 == (batch_size, 64, 64, embedding_dim // 16)
        x = self.res_cnn1(x)
        x = self.norm1(x)
        x = tf.nn.relu(x)
        # x shape after pool1 == (batch_size, 32, 32, embedding_dim // 16)
        x = self.pool1(x)
        
        # x shape after cnn2 == (batch_size, 32, 32, embedding_dim // 4)
        x = self.res_cnn2(x)
        x = self.norm2(x)
        x = tf.nn.relu(x)
        # x shape after pool2 == (batch_size, 16, 16, embedding_dim // 4)
        x = self.pool2(x)
        
        # x shape after cnn3 == (batch_size, 16, 16, embedding_dim)
        x = self.res_cnn3(x)
        x = self.norm3(x)
        x = tf.nn.relu(x)
        # reshape from (batch_size, 16, 16, embedding_dim) to (batch_size, 256, embedding_dim)
        x = tf.reshape(x, [x.shape[0], -1, x.shape[-1]])
        # x shape after fc == (batch_size, 256, embedding_dim)
        if training:
            x = tf.nn.dropout(x, rate=0.4)
        x = self.fc(x)
        return x

In [15]:
class Bahdanau_Attention(tf.keras.Model):
    def __init__(self, attention_dim):
        super(Bahdanau_Attention, self).__init__()
        self.W1 = tf.keras.layers.Dense(attention_dim)
        self.W2 = tf.keras.layers.Dense(attention_dim)
        self.V = tf.keras.layers.Dense(1)

    def call(self, features, hidden):
        # features(CNN_Encoder output) shape == (batch_size, 256, embedding_dim)

        # hidden shape == (batch_size, hidden_size)
        # hidden_with_time_axis shape == (batch_size, 1, hidden_size)
        hidden_with_time_axis = tf.expand_dims(hidden, 1)

        # score shape == (batch_size, 256, attention_dim)
        score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))

        # attention_weights shape == (batch_size, 256, 1)
        # you get 1 at the last axis because you are applying score to self.V
        attention_weights = tf.nn.softmax(self.V(score), axis=1)

        # context_vector shape after sum == (batch_size, embedding_dim)
        context_vector = attention_weights * features
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights

In [16]:
class Simple_Decoder(tf.keras.Model):
    def __init__(self, embedding_dim, max_length, hidden_size, vocab_size):
        super(Simple_Decoder, self).__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.attention = Bahdanau_Attention(hidden_size)
        self.fc1 = tf.keras.layers.Dense(hidden_size, activation='relu')
        self.fc2 = tf.keras.layers.Dense(vocab_size)
        
    def call(self, feature, position):
        # y shape (batch_size, hidden_size)
        y = self.embedding(position)
        # x shape (batch_size, embedding_dim)
        x, w = self.attention(feature, y)
        # x shape (batch_size, hidden_size)
        x = self.fc1(x)
        # x shape (batch_size, vocab_size)
        x = self.fc2(x)
        return x, w

In [17]:
class Length_Decoder(tf.keras.Model):
    def __init__(self, max_length):
        super(Length_Decoder, self).__init__()
        self.pool = tf.keras.layers.MaxPool2D((2, 2))
        self.fc1 = tf.keras.layers.Dense(max_length * 16, activation='relu')
        self.fc2 = tf.keras.layers.Dense(max_length * 16, activation='relu')
        self.fc3 = tf.keras.layers.Dense(max_length * 4, activation='relu')
        self.fc4 = tf.keras.layers.Dense(max_length)
        
    def call(self, x, d_t=None, d_c=None):
        x = tf.reshape(x, (x.shape[0], 16, 16, x.shape[-1]))
        x = self.pool(x) # shape = (batch_size, 8, 8, embedding_dim)
        x = self.fc1(x)
        x = tf.reshape(x, (x.shape[0], -1))
        if d_t != None and d_c != None:
            d = tf.concat([tf.cast(d_t, 'float32'), tf.cast(d_c, 'float32')], axis=-1)
            x = tf.concat([d, x], axis=-1)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        # shape = (batch_size, max_length)
        return x

In [18]:
class RNN_Decoder(tf.keras.Model):
    def __init__(self, embedding_dim, hidden_size, vocab_size, max_length):
        super(RNN_Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru1 = tf.keras.layers.GRU(self.hidden_size, return_sequences=True,
                                        return_state=True, recurrent_initializer='glorot_uniform', dropout=0.3)
        self.gru2 = tf.keras.layers.GRU(self.hidden_size, return_sequences=True,
                                        return_state=True, recurrent_initializer='glorot_uniform')
        self.gru3 = tf.keras.layers.GRU(self.hidden_size, return_sequences=True,
                                        return_state=True, recurrent_initializer='glorot_uniform', dropout=0.3)
        self.fc1 = tf.keras.layers.Dense(hidden_size, activation='relu')
        self.fc2 = tf.keras.layers.Dense(vocab_size)

        self.attention = Bahdanau_Attention(hidden_size)

    def call(self, x, l, d_t, d_c, features, hidden, training=True):
        # x is forward direction, y is beckward direction
        # defining attention as a separate model
        l = tf.cast(l, 'float32')
        hidden_0_with_length = tf.concat([l, hidden[0]], axis=-1)
        context_vector, attention_weights = self.attention(features, hidden_0_with_length)
        l = tf.expand_dims(l, 1)
        d = tf.expand_dims(tf.concat([tf.cast(d_t, 'float32'), tf.cast(d_c, 'float32')], axis=-1), 1)

        # x shape before is (batch_size, 1) since it is passed through one by one at a time
        # x shape after passing through embedding == (batch_size, 1, embedding_dim)
        x = self.embedding(x)
        # context_vector shape is (batch_size, embedding_dim)
        # x shape after concatenation == (batch_size, 1, embedding_dim + embedding_dim)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

        # passing the concatenated vector to the GRU
        # x shape is (batch_size, 1, hidden_size)
        # state is new hidden used in next step
        x, state1 = self.gru1(x, initial_state = hidden[0], training=training)
        x_identity = tf.identity(x)
        x = tf.concat([d, l, x], axis=-1)
        x, state2 = self.gru2(x, initial_state = hidden[1], training=training)
        x_identity2 = tf.identity(x)
        x, state3 = self.gru3(x + x_identity, initial_state = hidden[2], training=training)
        # x shape (batch_size, 1, max_length + hidden_size)
        x = tf.concat([d, l, x + x_identity2], axis=-1)
        x = tf.reshape(x, (x.shape[0], -1))
        # x shape (batch_size, hidden_size)
        x = self.fc1(x)
        # x shape (batch_size, vocab_size)
        x = self.fc2(x)

        return x, [state1, state2, state3], attention_weights

    def reset_state(self, batch_size):
        # generate new hidden layer with different batch size
        return [tf.zeros((batch_size, self.hidden_size)) for _ in range(3)]

## Graph

In [19]:
optimizer_step1 = tf.keras.optimizers.Adam()
optimizer_step2 = tf.keras.optimizers.Adam()
optimizer_length = tf.keras.optimizers.Adam()
optimizer_dups = tf.keras.optimizers.Adam()

### Step 1

In [20]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

def loss_function(real, pred):
    loss_ = loss_object(real, pred)
    return tf.reduce_mean(loss_)

def accuracy_function(real, pred):
    pred_index = tf.math.argmax(pred, axis=-1)
    return tf.math.reduce_mean(tf.cast(pred_index == real, tf.float32))

In [21]:
@tf.function
def train_step1(glyph, target, length, total_dups, curr_dups):
    loss = 0; accuracy = 0
    with tf.GradientTape() as tape:
        features = encoder(glyph)
        for i in range(1, target.shape[1]):
            position = tf.convert_to_tensor(np.repeat(i-1, target.shape[0]), dtype='int64')
            prediction, weight = simple_decoder(features, position)
            loss += loss_function(target[:, i], prediction)
            accuracy += accuracy_function(target[:, i], prediction)

    trainable_variables = simple_decoder.trainable_variables + encoder.trainable_variables
    gradients = tape.gradient(loss, trainable_variables)
    optimizer_step1.apply_gradients(zip(gradients, trainable_variables))
    
    with tf.GradientTape() as tape_length:
        length_pred = length_decoder(features, total_dups, curr_dups)
        loss_length = loss_function(tf.math.argmax(length, axis=-1), length_pred)

    gradients_length = tape_length.gradient(loss_length, length_decoder.trainable_variables)
    optimizer_length.apply_gradients(zip(gradients_length, length_decoder.trainable_variables))
    
    with tf.GradientTape() as tape_dups:
        dups_pred = dup_decoder(features)
        loss_dups = loss_function(tf.math.argmax(total_dups, axis=-1), dups_pred)

    gradients_dups = tape_dups.gradient(loss_dups, dup_decoder.trainable_variables)
    optimizer_dups.apply_gradients(zip(gradients_dups, dup_decoder.trainable_variables))
    
    return loss / (target.shape[1] - 1), accuracy / (target.shape[1] - 1)

In [22]:
@tf.function
def validation_step1(glyph, target):
    loss = 0; accuracy = 0
    feature = encoder(glyph, training=False)
    for i in range(1, target.shape[1]):
        position = tf.convert_to_tensor(np.repeat(i-1, target.shape[0]), dtype='int64')
        prediction, weight = simple_decoder(feature, position)
        loss += loss_function(target[:, i], prediction)
        accuracy += accuracy_function(target[:, i], prediction)
    return loss / (target.shape[1] - 1), accuracy / (target.shape[1] - 1)

In [23]:
def step1(epoch):
    start = time.time()
    total_loss = 0; val_loss = 0
    total_accuracy = 0; val_accuracy = 0

    for (batch, (glyph_tensor, target, length, total_dups, curr_dups)) in enumerate(dataset):
        t_loss, accuracy = train_step1(glyph_tensor, target, length, total_dups, curr_dups)
        total_loss += t_loss
        total_accuracy += accuracy
        print('Epoch {}, Train Loss {:.4f}, Accuracy {:.2%}; progress {:.1%}, taken {:.0f} sec'.format(
            epoch + 1, total_loss/batch, total_accuracy / batch, batch / num_steps, time.time() - start), end='\r')
    
    for (glyph_tensor, target, _, _, _) in val_dataset:
        t_loss, accuracy = validation_step1(glyph_tensor, target)
        val_loss += t_loss
        val_accuracy += accuracy
   
    # storing the epoch end loss value to plot later 
    ckpt_manager_step1.save()

    print ('Epoch {}, Train Loss {:.4f}, Accuracy {:.2%} | Validation Loss {:.4f}, Accuracy {:.2%}; taken {:.0f} sec'.format(
        epoch+1, total_loss/num_steps, total_accuracy/num_steps, val_loss/num_steps_val, val_accuracy/num_steps_val, time.time() - start))

### Step 2

In [24]:
def predict(features, max_length, length, total_dups, curr_dups):
    # start with 0
    dec_input = tf.convert_to_tensor([[0]]*features.shape[0], dtype='int64')
    hidden = decoder.reset_state(batch_size=features.shape[0])
    probability = tf.convert_to_tensor([1]*features.shape[0], dtype='float32')
    # iterate predictions, no teacher forcing here
    for i in range(max_length):
        prediction, hidden, attention_weights = decoder(
            tf.expand_dims(dec_input[:, i], 1), length, total_dups, curr_dups, features, hidden, training=False)
        # we need deterministic result
        probability *= tf.math.reduce_max(tf.math.softmax(prediction, axis=-1), axis=-1)
        predicted_id = tf.math.argmax(prediction, axis=-1)
        dec_input = tf.concat([dec_input, tf.expand_dims(predicted_id, 1)], axis=1)
    return dec_input, probability

In [25]:
def predict_next(features, target, length, total_dups, curr_dups, training=True):
    hidden = decoder.reset_state(batch_size=features.shape[0])
    predictions = tf.constant(0, dtype='float32', shape=(features.shape[0], 1, VOCAB))
    for i in range(target.shape[1]-1):
        prediction, hidden, attention_weights = decoder(
            tf.expand_dims(target[:, i], 1), length, total_dups, curr_dups, features, hidden, training=training)
        predictions = tf.concat([predictions, tf.expand_dims(prediction, 1)], axis=1)
    return predictions[:, 1:, :]

In [26]:
def loss_function_step2(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)
    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask
    loss_ = tf.reduce_mean(loss_, axis=0)
    return tf.reduce_sum(loss_)

def accuracy_function_step2(real, pred):
    accuracy = tf.math.reduce_all(pred == real, 1)
    return tf.math.reduce_mean(tf.cast(accuracy, tf.float32))

In [27]:
@tf.function
def train_step2(glyph_tensor, target, length, total_dups, curr_dups):

    with tf.GradientTape() as tape:
        features = encoder(glyph_tensor)
        predictions = predict_next(features, target, length, total_dups, curr_dups)
        loss = loss_function_step2(target[:, 1:], predictions)
    
    trainable_variables = decoder.trainable_variables + encoder.trainable_variables
    gradients = tape.gradient(loss, trainable_variables)
    optimizer_step2.apply_gradients(zip(gradients, trainable_variables))
    
    with tf.GradientTape() as tape_length:
        length_pred = length_decoder(features, d_t=total_dups, d_c=curr_dups)
        loss_length = loss_function(tf.math.argmax(length, axis=-1), length_pred)

    gradients_length = tape_length.gradient(loss_length, length_decoder.trainable_variables)
    optimizer_length.apply_gradients(zip(gradients_length, length_decoder.trainable_variables))
    
    with tf.GradientTape() as tape_dups:
        dups_pred = dup_decoder(features)
        loss_dups = loss_function(tf.math.argmax(total_dups, axis=-1), dups_pred)

    gradients_dups = tape_dups.gradient(loss_dups, dup_decoder.trainable_variables)
    optimizer_dups.apply_gradients(zip(gradients_dups, dup_decoder.trainable_variables))
    
    # calculate accuracy based on the code's whole string
    predictions_id, _ = predict(features, MAX_LEN, tf.nn.softmax(length_pred, axis=-1), total_dups, curr_dups)
    accuracy = accuracy_function_step2(predictions_id, target)
    accuracy_length = accuracy_function(tf.math.argmax(length, axis=-1), length_pred)

    return loss / (target.shape[1] - 1), accuracy, loss_length, accuracy_length

In [28]:
@tf.function
def validation_step2(glyph_tensor, target, length, total_dups, curr_dups):
    features = encoder(glyph_tensor, training=False)
    dups_pred = tf.nn.softmax(dup_decoder(features), axis=-1)
    predictions = predict_next(features, target, length, dups_pred, curr_dups, training=False)
    loss = loss_function_step2(target[:, 1:], predictions)
    length_pred = length_decoder(features, d_t=dups_pred, d_c=curr_dups)

    # calculate accuracy based on the code's whole string
    predictions_id, _ = predict(features, MAX_LEN, tf.nn.softmax(length_pred, axis=-1), dups_pred, curr_dups)
    accuracy = accuracy_function_step2(predictions_id, target)
    loss_length = loss_function(tf.math.argmax(length, axis=-1), length_pred)
    accuracy_length = accuracy_function(tf.math.argmax(length, axis=-1), length_pred)
    
    return loss / (target.shape[1] - 1), accuracy, loss_length, accuracy_length

In [29]:
def step2(epoch):
    start = time.time()
    total_loss = 0; val_loss = 0; len_loss = 0; val_len_loss = 0
    total_accuracy = 0; val_accuracy = 0; len_accu = 0; val_len_accu = 0

    for (batch, (glyph_tensor, target, length, total_dups, curr_dups)) in enumerate(dataset):
#        if batch == 0:
#            tf.summary.trace_on(graph=True, profiler=True)
        t_loss, accuracy, loss_length, accuracy_length = train_step2(glyph_tensor, target, length, total_dups, curr_dups)
#        if batch == 0:
#            with graph_summary_writer.as_default():
#                tf.summary.trace_export(name="train_trace", step=epoch, profiler_outdir=graph_log_dir)
#            tf.summary.trace_off()
        total_loss += t_loss; total_accuracy += accuracy
        len_loss += loss_length; len_accu += accuracy_length
        
        print('Epoch {}, Train Loss {:.4f}, Accuracy {:.2%}; Length Loss {:.4f}, Accuracy {:.2%}; progress {:.1%}, taken {:.0f} sec'.format(
            epoch + 1, total_loss/batch, total_accuracy/batch, len_loss/batch, len_accu/batch, batch/num_steps, time.time() - start), end='\r')
    
    for (glyph_tensor, target, length, total_dups, curr_dups) in val_dataset:
        t_loss, accuracy, loss_length, accuracy_length = validation_step2(glyph_tensor, target, length, total_dups, curr_dups)
        val_loss += t_loss; val_accuracy += accuracy
        val_len_loss += loss_length; val_len_accu += accuracy_length
   
    # storing the epoch end loss value to plot later
    with train_summary_writer.as_default():
        tf.summary.scalar('loss', (total_loss / num_steps), step=epoch)
        tf.summary.scalar('accuracy', (total_accuracy / num_steps), step=epoch)
        tf.summary.scalar('length_loss', (len_loss / num_steps), step=epoch)
        tf.summary.scalar('length_accuracy', (len_accu / num_steps), step=epoch)
    with test_summary_writer.as_default():
        tf.summary.scalar('loss', val_loss / num_steps_val, step=epoch)
        tf.summary.scalar('accuracy', val_accuracy / num_steps_val, step=epoch)
        tf.summary.scalar('length_loss', val_len_loss / num_steps_val, step=epoch)
        tf.summary.scalar('length_accuracy', val_len_accu / num_steps_val, step=epoch)
    
    ckpt_manager_step2.save()

    print('Epoch {}, Train Loss {:.4f}, Accuracy {:.2%}; Length Loss {:.4f}, Accuracy {:.2%} | Validation Loss {:.4f}, Accuracy {:.2%}, Length Loss {:.4f}, Accuracy {:.2%}, taken {:.0f} sec'.format(
        epoch + 1, total_loss/num_steps, total_accuracy/num_steps, len_loss/num_steps, len_accu/num_steps,
        val_loss/num_steps_val, val_accuracy/num_steps_val, val_len_loss/num_steps_val, val_len_accu/num_steps_val, time.time() - start))

## Training

In [30]:
encoder = CNN_Encoder(embedding_dim = EBD_DIM)
simple_decoder = Simple_Decoder(embedding_dim = EBD_DIM, max_length = MAX_LEN, hidden_size = UNIT_DIM, vocab_size = VOCAB)
length_decoder = Length_Decoder(max_length = MAX_LEN)
dup_decoder = Length_Decoder(max_length = MAX_DUP)
decoder = RNN_Decoder(embedding_dim=EBD_DIM, hidden_size=UNIT_DIM, max_length = MAX_LEN, vocab_size=VOCAB)

In [31]:
# use a checkpoint to store weights
checkpoint_path_step1 = './checkpoints/train_step1'
ckpt_step1 = tf.train.Checkpoint(encoder=encoder, decoder=simple_decoder, length_decoder=length_decoder,
    dup_decoder=dup_decoder, optimizer=optimizer_step1, optimizer_length=optimizer_length, optimizer_dups=optimizer_dups)
ckpt_manager_step1 = tf.train.CheckpointManager(ckpt_step1, checkpoint_path_step1, max_to_keep=5)

In [32]:
# use a checkpoint to store weights
checkpoint_path_step2 = "./checkpoints/train_step2"
ckpt_step2 = tf.train.Checkpoint(encoder=encoder, decoder=decoder, length_decoder=length_decoder,
    dup_decoder=dup_decoder, optimizer=optimizer_step2, optimizer_length=optimizer_length, optimizer_dups=optimizer_dups)
ckpt_manager_step2 = tf.train.CheckpointManager(ckpt_step2, checkpoint_path_step2, max_to_keep=5)

In [33]:
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
test_log_dir = 'logs/gradient_tape/' + current_time + '/test'
#graph_log_dir = 'logs/gradient_tape/' + current_time + '/func'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)
#graph_summary_writer = tf.summary.create_file_writer(graph_log_dir)

### Step 1

In [34]:
EPOCHS_STEP1 = 20

epoch_step1 = 0
if ckpt_manager_step1.latest_checkpoint:
    epoch_step1 = int(ckpt_manager_step1.latest_checkpoint.split('-')[-1])
    ckpt_step1.restore(ckpt_manager_step1.latest_checkpoint)

In [35]:
while epoch_step1 < EPOCHS_STEP1:
    step1(epoch_step1)
    epoch_step1 += 1

### Step 2

In [36]:
EPOCHS_STEP2 = 120

epoch_step2 = 0
if ckpt_manager_step2.latest_checkpoint:
    epoch_step2 = int(ckpt_manager_step2.latest_checkpoint.split('-')[-1])
    ckpt_step2.restore(ckpt_manager_step2.latest_checkpoint)

In [37]:
while epoch_step2 < EPOCHS_STEP2:
    step2(epoch_step2)
    epoch_step2 += 1

## Testing

In [38]:
@tf.function
def test(glyph):
    features = encoder(glyph, training=False)
    total_dups = tf.nn.softmax(dup_decoder(features), axis=-1)
    dups_dict = tf.math.argmax(total_dups, axis=-1)
    max_dup = tf.math.reduce_max(dups_dict) + 1
    
    results = tf.zeros((glyph.shape[0], 1, MAX_LEN + 1), dtype='int64')
    probs = tf.zeros((glyph.shape[0], 1), dtype='float32')
    identity_matrix = tf.convert_to_tensor(np.identity(MAX_DUP), dtype='int64')
    for i in range(max_dup):
        
        tf.autograph.experimental.set_loop_options(
            shape_invariants=[(results, tf.TensorShape([glyph.shape[0], None, MAX_LEN + 1])), 
                             (probs, tf.TensorShape([glyph.shape[0], None]))])
        
        curr_dups = tf.math.minimum(tf.math.argmax(total_dups, axis=-1), i)
        curr_dups = tf.nn.embedding_lookup(identity_matrix, curr_dups)
        length = tf.nn.softmax(length_decoder(features, total_dups, curr_dups), axis=-1)
        test_result, prob = predict(features, MAX_LEN, length, total_dups, curr_dups)
        results = tf.concat([results, tf.expand_dims(test_result, axis=1)], axis=1)
        probs = tf.concat([probs, tf.expand_dims(prob, axis=1)], axis=1)
    return results[:, 1:, :], probs[:, 1:], dups_dict

In [39]:
def evaluate(word):
    test_input = []
    for char in word:
        glyph = glyphbook.draw(char)
        if glyph is not None:
            test_input.append(glyph)
        else:
            raise ValueError('Character {} unsupported.'.format(char))
    test_input = np.expand_dims(test_input, -1)
    
    def decode(indexes):
        code = ''
        for i in indexes:
            if i <= 0:
                continue
            elif i >= 27:
                break
            else:
                code += chr(i + 96)
        return code
    
    results, probs, dups_dict = test(test_input)
    results = results.numpy()
    probs = probs.numpy()
    
    final_result = []
    for i in range(results.shape[0]):
        final_result.append([])
        for j in range(results.shape[1]):
            if j <= dups_dict[i]:
                final_result[-1].append([decode(results[i, j, :]), probs[i, j]])
    
    return final_result

In [40]:
evaluate('日月金木水火土的戈十大中一弓人心手口尸廿山女田止卜片')

[[['au', 0.70068336]],
 [['b', 0.9959378]],
 [['c', 0.73717195]],
 [['dd', 0.2912861]],
 [['e', 0.99911267]],
 [['f', 0.87351245]],
 [['g', 0.9789459]],
 [['hap', 0.99752855]],
 [['ij', 0.531378]],
 [['j', 0.89204115]],
 [['k', 0.98927003]],
 [['l', 0.6595829]],
 [['mm', 0.93622696]],
 [['n', 0.9959586]],
 [['o', 0.89679116]],
 [['p', 0.98779416]],
 [['q', 0.96840435]],
 [['av', 0.3311546]],
 [['sh', 0.60348976]],
 [['t', 0.9919883]],
 [['u', 0.9765548]],
 [['v', 0.9550942]],
 [['w', 0.6679651]],
 [['x', 0.9933106]],
 [['y', 0.99239075]],
 [['llml', 0.90976393], ['llms', 0.7984418]]]