In [1]:
from PIL import ImageFont, ImageDraw, Image
from fontTools.ttLib import TTFont

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

import time
import datetime
import multiprocessing

In [2]:
VOCAB = 28; EBD_DIM = 256; UNIT_DIM = 256; BATCH_SIZE_PER_REPLICA = 128
strategy = tf.distribute.MirroredStrategy()
REPLICA_SIZE = strategy.num_replicas_in_sync; BATCH_SIZE = BATCH_SIZE_PER_REPLICA * REPLICA_SIZE

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


## Load Data

In [3]:
class Glyph(object):
    # transform character to bitmap
    def __init__(self, fonts, size=64):
        # load fonts, size. We will use 2 fonts for all CJK characters, so keep 2 codepoint books.
        self.codepoints = [set() for _ in fonts]
        self.size = int(size * 0.9)
        self.size_img = size
        self.pad = (size - self.size) // 2
        self.fonts = [ImageFont.truetype(f, self.size) for f in fonts]
        # use a cache to reduce computation if duplicated characters encountered.
        self.cache = {}
        for cp, font in zip(self.codepoints, fonts):
            font = TTFont(font)
            # store codepoints in font cmap into self.codepoints
            for cmap in font['cmap'].tables:
                if not cmap.isUnicode():
                    continue
                for k in cmap.cmap:
                    cp.add(k)
    
    def draw(self, ch):
        if ch in self.cache:
            return self.cache[ch]
        # search among fonts, use the first found
        exist = False
        for i in range(len(self.codepoints)):
            if ord(ch) in self.codepoints[i]:
                font = self.fonts[i]
                exist = True
                break
        if not exist:
            return None

        img = Image.new('L', (self.size_img, self.size_img), 0)
        draw = ImageDraw.Draw(img)
        (width, baseline), (offset_x, offset_y) = font.font.getsize(ch)
        draw.text((self.pad - offset_x, self.pad - offset_y + 4), ch, font=font, fill=255, stroke_fill=255) 
        img_array = np.array(img.getdata(), dtype='float32').reshape((self.size_img, self.size_img)) / 255
        self.cache[ch] = img_array

        return img_array

In [4]:
glyphbook = Glyph(['data/fonts/TH-Ming-HP0.ttf', 'data/fonts/TH-Ming-P2.ttf'], size=64)

def _mapping(item):
    char, code, dup_total, dup_curr = item
    glyph = glyphbook.draw(char)
    if glyph is not None:
        return glyph, code, dup_total, dup_curr

In [5]:
def preprocess_chart(chart, cores=multiprocessing.cpu_count()):
    glyphs = []; codes = []
    dup_total = []; dup_curr = []
    if cores > 0:
        with multiprocessing.Pool(processes=cores) as pool:
            for item in pool.map(_mapping, chart.values):
                if item is not None:
                    glyphs.append(item[0])
                    codes.append(item[1])
                    dup_total.append(item[2])
                    dup_curr.append(item[3])
    else:
        for item in np.apply_along_axis(_mapping, axis = 1, arr = chart.values):
            if item is not None:
                glyphs.append(item[0])
                codes.append(item[1])
                dup_total.append(item[2])
                dup_curr.append(item[3])
    return np.expand_dims(np.array(glyphs), -1), np.array(codes), np.array(dup_total), np.array(dup_curr)

In [6]:
def tokenizer(code_table):
    # Cangjie code consists only of a-z, with maximum length of 5, minimum of 1
    # start with 0, a-z are 1-26, end and padding are 27
    tokens = np.zeros((*code_table.shape, 1), dtype='int64')
    code_index = list(map(lambda x: list(map(lambda y: ord(y) - 96, list(x))) + [27] * (5-len(x)), code_table))
    tokens = np.append(tokens, np.array(code_index), axis=-1)
    return tokens

In [7]:
code_chart = pd.read_csv('data/cangjie6.txt', delimiter='\t', header=None, names=['Char', 'Code'], keep_default_na=False)

In [8]:
count = {}
for char, code in code_chart.values:
    if char in count:
        count[char].append(code)
        count[char].sort(key=len)
        count[char].sort(key=lambda x: (len(x), x))
    else:
        count[char] = [code]

In [9]:
MAX_LEN = code_chart.Code.map(len).max()
MAX_DUP = max(map(lambda x: len(x), count.values()))

In [10]:
code_chart['DuplicateTotal'] = code_chart['Char'].map(count).map(len).copy()
code_chart['DuplicateCurrent'] = code_chart.apply(lambda x: count[x['Char']].index(x['Code']) + 1, axis=1).copy()

In [11]:
glyphs, codes, dups_total, dups_curr = preprocess_chart(code_chart)
tokens = tokenizer(codes)
lengths = np.array([len(list(filter(lambda i: i < VOCAB - 1 and i > 0, x))) for x in tokens])
lengths = np.array([np.identity(MAX_LEN)[i-1] for i in lengths], dtype='int64')
dups_total = np.array([np.identity(MAX_DUP)[i-1] for i in dups_total], dtype='int64')
dups_curr = np.array([np.identity(MAX_DUP)[i-1] for i in dups_curr], dtype='int64')
del code_chart, codes, count

In [12]:
(train_glyphs, validation_glyphs,
 train_tokens, validation_tokens,
 train_lengths, validation_lengths,
 train_dups_total, validation_dups_total,
 train_dups_curr, validation_dups_curr) = train_test_split(
    glyphs, tokens, lengths, dups_total, dups_curr, test_size=0.1, random_state=902)
del glyphs, tokens, lengths, dups_total, dups_curr

In [13]:
num_samples = train_glyphs.shape[0]
num_samples_val = validation_glyphs.shape[0]

dataset = tf.data.Dataset.from_tensor_slices((train_glyphs, train_tokens, train_lengths, train_dups_total, train_dups_curr))
dataset = dataset.shuffle(train_glyphs.shape[0]).batch(BATCH_SIZE)
dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
dataset = strategy.experimental_distribute_dataset(dataset)

val_dataset = tf.data.Dataset.from_tensor_slices((validation_glyphs, validation_tokens, validation_lengths, validation_dups_total, validation_dups_curr))
val_dataset = val_dataset.shuffle(validation_glyphs.shape[0]).batch(BATCH_SIZE)
val_dataset = val_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
val_dataset = strategy.experimental_distribute_dataset(val_dataset)

del train_glyphs, validation_glyphs, train_tokens, validation_tokens, train_lengths, validation_lengths
del train_dups_total, validation_dups_total, train_dups_curr, validation_dups_curr

## Model

$$\mathrm{Smooth\ ReLU}(x;\alpha):=\frac{1}{2}\left(\log\left(e^{2\alpha x}+e^{2x}\right)-\log 2\right)$$

In [14]:
class Smooth_ReLU(tf.keras.layers.Layer):
    def __init__(self,
                 alpha_initializer='zeros',
                 alpha_regularizer=None,
                 alpha_constraint=None,
                 shared_axes=None,
                 **kwargs):
        super(Smooth_ReLU, self).__init__(**kwargs)
        self.supports_masking = True
        self.alpha_initializer = tf.keras.initializers.get(alpha_initializer)
        self.alpha_regularizer = tf.keras.regularizers.get(alpha_regularizer)
        self.alpha_constraint = tf.keras.constraints.get(alpha_constraint)
        if shared_axes is None:
            self.shared_axes = None
        elif not isinstance(shared_axes, (list, tuple)):
            self.shared_axes = [shared_axes]
        else:
            self.shared_axes = list(shared_axes)
        
    def build(self, input_shape):
        param_shape = list(input_shape[1:])
        if self.shared_axes is not None:
            for i in self.shared_axes:
                param_shape[i - 1] = 1
        self.alpha = self.add_weight(
            shape=param_shape,
            name='alpha',
            initializer=self.alpha_initializer,
            regularizer=self.alpha_regularizer,
            constraint=self.alpha_constraint)
        # Set input spec
        axes = {}
        if self.shared_axes:
            for i in range(1, len(input_shape)):
                if i not in self.shared_axes:
                    axes[i] = input_shape[i]
        self.input_spec = tf.keras.layers.InputSpec(ndim=len(input_shape), axes=axes)
        self.built = True
    
    def call(self, inputs):
        inputs = 2 * inputs
        return 0.5 * (tf.math.reduce_logsumexp([self.alpha * inputs, inputs], axis = 0) - tf.math.log(2.0))
    
    def get_config(self):
        config = {
            'alpha_initializer': tf.keras.initializers.serialize(self.alpha_initializer),
            'alpha_regularizer': tf.keras.regularizers.serialize(self.alpha_regularizer),
            'alpha_constraint': tf.keras.constraints.serialize(self.alpha_constraint),
            'shared_axes': self.shared_axes
        }
        base_config = super(Smooth_ReLU, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def compute_output_shape(self, input_shape):
        return input_shape

In [15]:
class Res_CNN(tf.keras.layers.Layer):
    def __init__(self, feature_dim, kernel_size, **kwargs):
        super(Res_CNN, self).__init__(**kwargs)
        self.feature_dim = feature_dim
        self.kernel_size = kernel_size
        
    def call(self, x):
        x = self.cnn1(x)
        x_identity = tf.identity(x)
        x = self.cnn2(x)
        x_identity2 = tf.identity(x)
        x = self.cnn3(x + x_identity)
        x = self.norm(x + x_identity2)
        x = self.srelu(x)
        return x
    
    def build(self, input_shape):
        self.input_spec = tf.keras.layers.InputSpec(ndim=len(input_shape))
        self.cnn1 = tf.keras.layers.Convolution2D(self.feature_dim, self.kernel_size, padding='same')
        self.cnn2 = tf.keras.layers.Convolution2D(self.feature_dim, self.kernel_size, padding='same')
        self.cnn3 = tf.keras.layers.Convolution2D(self.feature_dim, self.kernel_size, padding='same')
        self.norm = tf.keras.layers.BatchNormalization()
        self.srelu = Smooth_ReLU(alpha_initializer = 'lecun_normal', alpha_constraint = tf.keras.constraints.max_norm(max_value=0.5, axis=[]), shared_axes = [1, 2])
        self.built = True
    
    def get_config(self):
        config = {
            'feature_dim': self.feature_dim,
            'kernel_size': self.kernel_size
        }
        base_config = super(Res_CNN, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
    
    def compute_output_shape(self, input_shape):
        return input_shape[:-1] + [self.feature_dim]

In [16]:
class CNN_Encoder(tf.keras.layers.Layer):
    # This is essentially a CNN layer, 
    def __init__(self, embedding_dim, **kwargs):
        super(CNN_Encoder, self).__init__(**kwargs)
        self.embedding_dim = embedding_dim

    def call(self, x, training=True):

        # x shape after cnn1 == (batch_size, 64, 64, embedding_dim // 16)
        x = self.res_cnn1(x)
        # x shape after pool1 == (batch_size, 32, 32, embedding_dim // 16)
        x = self.pool1(x)
        
        # x shape after cnn2 == (batch_size, 32, 32, embedding_dim // 4)
        x = self.res_cnn2(x)
        # x shape after pool2 == (batch_size, 16, 16, embedding_dim // 4)
        x = self.pool2(x)
        
        # x shape after cnn3 == (batch_size, 16, 16, embedding_dim)
        x = self.res_cnn3(x)

        if training:
            x = tf.nn.dropout(x, rate=0.5)
        return x
    
    def build(self, input_shape):
        self.input_spec = tf.keras.layers.InputSpec(ndim=len(input_shape))
        self.res_cnn1 = Res_CNN(self.embedding_dim // 16, (3, 3))
        self.pool1 = tf.keras.layers.MaxPool2D((2, 2))
        self.res_cnn2 = Res_CNN(self.embedding_dim // 4, (3, 3))
        self.pool2 = tf.keras.layers.MaxPool2D((2, 2))
        self.res_cnn3 = Res_CNN(self.embedding_dim, (3, 3))
        self.built = True
    
    def get_config(self):
        config = {
            'embedding_dim': self.embedding_dim,
        }
        base_config = super(CNN_Encoder, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
    
    def compute_output_shape(self, input_shape):
        return input_shape[:-3] + [input_shape[-3] // 4, input_shape[-2] // 4, self.embedding_dim]

In [17]:
class Bahdanau_Attention(tf.keras.layers.Layer):
    def __init__(self, attention_dim, **kwargs):
        super(Bahdanau_Attention, self).__init__(**kwargs)
        self.attention_dim = attention_dim

    def call(self, inputs):
        features, hidden = inputs
        # features(CNN_Encoder output) shape == (batch_size, 256, embedding_dim)
        features = tf.reshape(features, [tf.shape(features)[0], -1, features.shape[-1]])
        
        # hidden shape == (batch_size, hidden_size)
        # hidden_with_time_axis shape == (batch_size, 1, hidden_size)
        hidden_with_time_axis = tf.expand_dims(hidden, 1)

        # score shape == (batch_size, 1024, attention_dim)
        score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))

        # attention_weights shape == (batch_size, 256, 1)
        # you get 1 at the last axis because you are applying score to self.V
        attention_weights = tf.nn.softmax(self.V(score), axis=1)

        # context_vector shape after sum == (batch_size, embedding_dim)
        context_vector = attention_weights * features
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights
    
    def build(self, input_shape):
        self.W1 = tf.keras.layers.Dense(self.attention_dim)
        self.W2 = tf.keras.layers.Dense(self.attention_dim)
        self.V = tf.keras.layers.Dense(1)
        self.built = True
    
    def get_config(self):
        config = {
            'attention_dim': self.attention_dim,
        }
        base_config = super(Bahdanau_Attention, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
    
    def compute_output_shape(self, input_shape):
        return [input_shape[0], input_shape.shape[-1]]

In [18]:
class Simple_Decoder(tf.keras.layers.Layer):
    def __init__(self, embedding_dim, max_length, hidden_size, vocab_size, **kwargs):
        super(Simple_Decoder, self).__init__(**kwargs)
        self.embedding_dim = embedding_dim
        self.max_length = max_length
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        
    def call(self, inputs):
        features, position = inputs
        # y shape (batch_size, hidden_size)
        y = self.embedding(position)
        # x shape (batch_size, embedding_dim)
        x, w = self.attention([features, y])
        # x shape (batch_size, hidden_size)
        x = self.fc1(x)
        x = self.srelu(x)
        # x shape (batch_size, vocab_size)
        x = self.fc2(x)
        return x, w
    
    def build(self, input_shape):
        self.embedding = tf.keras.layers.Embedding(self.vocab_size, self.embedding_dim)
        self.attention = Bahdanau_Attention(self.embedding_dim)
        self.fc1 = tf.keras.layers.Dense(self.hidden_size)
        self.fc2 = tf.keras.layers.Dense(self.vocab_size)
        self.srelu = Smooth_ReLU(alpha_initializer = 'lecun_normal', alpha_constraint = tf.keras.constraints.max_norm(max_value=0.5, axis=[]))
        self.built = True
    
    def get_config(self):
        config = {
            'embedding_dim': self.embedding_dim,
            'max_length': self.max_length,
            'hidden_size': self.hidden_size,
            'vocab_size': self.vocab_size,
        }
        base_config = super(Simple_Decoder, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
    
    def compute_output_shape(self, input_shape):
        return [input_shape[0], self.vocab_size]

In [19]:
class Dense_Decoder(tf.keras.layers.Layer):
    def __init__(self, max_length, **kwargs):
        super(Dense_Decoder, self).__init__(**kwargs)
        self.max_length = max_length
        
    def call(self, inputs):
        x, d_t, d_c = inputs
        # shape after pool == (batch_size, 8, 8, embedding_dim)
        x = self.pool(x)
        x = self.fc1(x)
        x = self.srelu1(x)
        x = tf.reshape(x, [tf.shape(x)[0], tf.math.reduce_prod(x.shape[1:])])
        if d_t != None and d_c != None:
            d = tf.concat([tf.cast(d_t, 'float32'), tf.cast(d_c, 'float32')], axis=-1)
            x = tf.concat([d, x], axis=-1)
        x = self.fc2(x)
        x = self.srelu2(x)
        x = self.fc3(x)
        x = self.srelu3(x)
        x = self.fc4(x)
        # shape = (batch_size, max_length)
        return x
        
    def build(self, input_shape):
        self.pool = tf.keras.layers.MaxPool2D((2, 2))
        self.fc1 = tf.keras.layers.Dense(self.max_length * 16)
        self.fc2 = tf.keras.layers.Dense(self.max_length * 16)
        self.fc3 = tf.keras.layers.Dense(self.max_length * 4)
        self.fc4 = tf.keras.layers.Dense(self.max_length)
        self.srelu1 = Smooth_ReLU(alpha_initializer = 'lecun_normal', alpha_constraint = tf.keras.constraints.max_norm(max_value=0.5, axis=[]), shared_axes = [1, 2])
        self.srelu2 = Smooth_ReLU(alpha_initializer = 'lecun_normal', alpha_constraint = tf.keras.constraints.max_norm(max_value=0.5, axis=[]))
        self.srelu3 = Smooth_ReLU(alpha_initializer = 'lecun_normal', alpha_constraint = tf.keras.constraints.max_norm(max_value=0.5, axis=[]))
        self.built = True
    
    def get_config(self):
        config = {
            'max_length': self.max_length,
        }
        base_config = super(Dense_Decoder, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
    
    def compute_output_shape(self, input_shape):
        return [input_shape[0], self.max_length]

In [20]:
class RNN_Decoder(tf.keras.layers.Layer):
    def __init__(self, embedding_dim, hidden_size, vocab_size, max_length, **kwargs):
        super(RNN_Decoder, self).__init__(**kwargs)
        self.hidden_size = hidden_size
        self.embedding_dim = embedding_dim
        self.vocab_size = vocab_size
        self.max_length = max_length

    def call(self, inputs, training=True, teacher_forcing=False):
        x, l, d_t, d_c, features, hidden = inputs
        # x is forward direction, y is beckward direction
        # defining attention as a separate model
        l = tf.cast(l, 'float32')
        hidden_0_with_length = tf.concat([l, hidden[0]], axis=-1)
        context_vector, attention_weights = self.attention([features, hidden_0_with_length])
        l = tf.expand_dims(l, 1)
        d = tf.expand_dims(tf.concat([tf.cast(d_t, 'float32'), tf.cast(d_c, 'float32')], axis=-1), 1)

        # x shape before is (batch_size, 1) since it is passed through one by one at a time
        # x shape after passing through embedding == (batch_size, 1, embedding_dim)
        if teacher_forcing:
            x = self.embedding(x)
        else:
            if not self.embedding.built:
                self.embedding(x)
            x = tf.tensordot(x, self.embedding.weights[0], axes=[-1,0])
        # context_vector shape is (batch_size, embedding_dim)
        # x shape after concatenation == (batch_size, 1, embedding_dim + embedding_dim)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

        # passing the concatenated vector to the GRU
        # x shape is (batch_size, 1, hidden_size)
        # state is new hidden used in next step
        if training:
            x = tf.nn.dropout(x, rate=0.3)
        x, state1 = self.gru1(x, initial_state = hidden[0], training=training)
        x_identity = tf.identity(x)
        x = tf.concat([d, l, x], axis=-1)
        x, state2 = self.gru2(x, initial_state = hidden[1], training=training)
        x_identity2 = tf.identity(x)
        
        x = x + x_identity
        if training:
            x = tf.nn.dropout(x, rate=0.3)
        x, state3 = self.gru3(x, initial_state = hidden[2], training=training)
        # x shape (batch_size, 1, max_length + hidden_size)
        x = tf.concat([d, l, x + x_identity2], axis=-1)
        x = tf.reshape(x, [tf.shape(x)[0], tf.math.reduce_prod(x.shape[1:])])
        # x shape (batch_size, hidden_size)
        x = self.fc1(x)
        x = self.srelu(x)
        # x shape (batch_size, vocab_size)
        x = self.fc2(x)

        return x, [state1, state2, state3], attention_weights

    def reset_state(self, batch_size):
        # generate new hidden layer with different batch size
        return [tf.zeros([batch_size, self.hidden_size]) for _ in range(3)]
    
    def build(self, input_shape):
        self.embedding = tf.keras.layers.Embedding(self.vocab_size, self.embedding_dim)
        self.gru1 = tf.keras.layers.GRU(self.hidden_size, return_sequences=True,
                                        return_state=True, recurrent_initializer='glorot_uniform')
        self.gru2 = tf.keras.layers.GRU(self.hidden_size, return_sequences=True,
                                        return_state=True, recurrent_initializer='glorot_uniform')
        self.gru3 = tf.keras.layers.GRU(self.hidden_size, return_sequences=True,
                                        return_state=True, recurrent_initializer='glorot_uniform')
        self.fc1 = tf.keras.layers.Dense(self.hidden_size)
        self.fc2 = tf.keras.layers.Dense(self.vocab_size)
        self.srelu = Smooth_ReLU(alpha_initializer = 'lecun_normal', alpha_constraint = tf.keras.constraints.max_norm(max_value=0.5, axis=[]))
        self.attention = Bahdanau_Attention(self.hidden_size)
        self.built = True
    
    def get_config(self):
        config = {
            'hidden_size': self.hidden_size,
            'embedding_dim': self.embedding_dim,
            'vocab_size': self.vocab_size,
            'max_length': self.max_length,
        }
        base_config = super(RNN_Decoder, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
    
    def compute_output_shape(self, input_shape):
        return [input_shape[0], self.vocab_size]

In [21]:
class Cangjie(tf.keras.Model):
    def __init__(self, encoder, decoder, length, duplicate, **kwargs):
        super(Cangjie, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.length = length
        self.duplicate = duplicate
    
    @tf.function(input_signature=[tf.TensorSpec(shape=[None, 64, 64, 1], dtype=tf.float32)])
    def encode(self, glyph):
        return self.encoder(glyph, training=False)

    @tf.function(input_signature=[tf.TensorSpec(shape=[None, 16, 16, EBD_DIM], dtype=tf.float32)])
    def predict_duplicates(self, features):
        return tf.nn.softmax(self.duplicate([features, None, None]), axis=-1)
    
    @tf.function(input_signature=[tf.TensorSpec(shape=[None, 16, 16, EBD_DIM], dtype=tf.float32),
                                 tf.TensorSpec(shape=[None, MAX_DUP], dtype=tf.float32),
                                 tf.TensorSpec(shape=[None, MAX_DUP], dtype=tf.int64)])
    def predict_length(self, features, total_dups, curr_dups):
        return tf.nn.softmax(self.length([features, total_dups, curr_dups]), axis=-1)
    
    @tf.function(input_signature=[tf.TensorSpec(shape=[None, 16, 16, EBD_DIM], dtype=tf.float32),
                                  tf.TensorSpec(shape=[None, MAX_LEN], dtype=tf.float32),
                                  tf.TensorSpec(shape=[None, MAX_DUP], dtype=tf.float32),
                                  tf.TensorSpec(shape=[None, MAX_DUP], dtype=tf.int64)])
    def decode(self, features, length, total_dups, curr_dups):
        # start with 0
        dec_input = tf.repeat(tf.constant([[[1] + [0] * (VOCAB - 1)]], dtype='float32'), tf.shape(features)[0], axis=0)
        hidden = self.decoder.reset_state(batch_size=tf.shape(features)[0])
        probability = tf.ones([tf.shape(features)[0]], dtype='float32')
        # iterate predictions, no teacher forcing here
        for i in range(MAX_LEN):
            prediction, hidden, attention_weights = self.decoder([tf.expand_dims(dec_input[:, i, :], 1), length, total_dups, curr_dups, features, hidden], training=False, teacher_forcing=False)
            # we need deterministic result
            prediction = tf.math.softmax(prediction, axis=-1)
            probability *= tf.math.reduce_max(prediction, axis=-1)
            dec_input = tf.concat([dec_input, tf.expand_dims(prediction, 1)], axis=1)
        return tf.math.argmax(dec_input, axis=-1), probability   

    @tf.function(input_signature=[tf.TensorSpec(shape=[None, 64, 64, 1], dtype=tf.float32)])
    def call(self, glyph):
        features = self.encode(glyph)
        total_dups = self.predict_duplicates(features)
        dups_dict = tf.math.argmax(total_dups, axis=-1)
        max_dup = tf.math.reduce_max(dups_dict) + 1

        results = tf.zeros([tf.shape(glyph, out_type=tf.int64)[0], max_dup, MAX_LEN + 1], dtype='int64')
        probs = tf.zeros([tf.shape(glyph, out_type=tf.int64)[0], max_dup], dtype='float32')
        identity_matrix = tf.convert_to_tensor(np.identity(MAX_DUP), dtype='int64')

        for i in range(max_dup):
            curr_dups = tf.math.minimum(tf.math.argmax(total_dups, axis=-1), i)
            curr_dups = tf.nn.embedding_lookup(identity_matrix, curr_dups)
            length = self.predict_length(features, total_dups, curr_dups)
            test_result, prob = self.decode(features, length, total_dups, curr_dups)
            results = tf.concat([results[:, :i, :], tf.expand_dims(test_result, axis=1), tf.zeros([tf.shape(glyph, out_type=tf.int64)[0], max_dup - i - 1, MAX_LEN + 1], dtype='int64')], axis=1)
            probs = tf.concat([probs[:, :i], tf.expand_dims(prob, axis=1), tf.zeros([tf.shape(glyph, out_type=tf.int64)[0], max_dup - i - 1], dtype='float32')], axis=1)
        return results, probs, dups_dict

## Graph

In [22]:
with strategy.scope():
    optimizer_step1 = tf.keras.optimizers.Adam()
    optimizer_step2 = tf.keras.optimizers.Adam()
    optimizer_length = tf.keras.optimizers.Adam()
    optimizer_dups = tf.keras.optimizers.Adam()

### Step 1

In [23]:
def loss_function(real, pred):
    return tf.nn.sparse_softmax_cross_entropy_with_logits(labels=real, logits=pred)

def accuracy_function(real, pred):
    pred_index = tf.math.argmax(pred, axis=-1)
    return tf.cast(pred_index == real, tf.float32)

In [24]:
@tf.function
def train_step1(glyph, target, length, total_dups, curr_dups):
    # distributed run function kernel
    @tf.function
    def dist_step(glyph, target, length, total_dups, curr_dups):
        sample_loss = tf.zeros([glyph.shape[0]]); sample_accuracy = tf.zeros([glyph.shape[0]])
        with tf.GradientTape() as tape:
            features = encoder(glyph)
            for i in range(1, target.shape[1]):
                position = tf.constant(i-1, dtype='int64', shape=[target.shape[0]])
                prediction, weight = simple_decoder([features, position])
                sample_loss += loss_function(target[:, i], prediction)
                sample_accuracy += accuracy_function(target[:, i], prediction)
            loss = tf.reduce_sum(sample_loss) / BATCH_SIZE

        trainable_variables = simple_decoder.trainable_variables + encoder.trainable_variables
        gradients = tape.gradient(loss, trainable_variables)
        optimizer_step1.apply_gradients(zip(gradients, trainable_variables))

        with tf.GradientTape() as tape_length:
            length_pred = length_decoder([features, total_dups, curr_dups])
            loss_length = tf.reduce_sum(loss_function(tf.math.argmax(length, axis=-1), length_pred)) / BATCH_SIZE

        gradients_length = tape_length.gradient(loss_length, length_decoder.trainable_variables)
        optimizer_length.apply_gradients(zip(gradients_length, length_decoder.trainable_variables))

        with tf.GradientTape() as tape_dups:
            dups_pred = dup_decoder([features, None, None])
            loss_dups = tf.reduce_sum(loss_function(tf.math.argmax(total_dups, axis=-1), dups_pred)) / BATCH_SIZE

        gradients_dups = tape_dups.gradient(loss_dups, dup_decoder.trainable_variables)
        optimizer_dups.apply_gradients(zip(gradients_dups, dup_decoder.trainable_variables))

        return sample_loss / (target.shape[1] - 1), sample_accuracy / (target.shape[1] - 1)

    sample_loss, sample_accuracy = strategy.run(dist_step, args=(glyph, target, length, total_dups, curr_dups))
    loss = strategy.reduce(tf.distribute.ReduceOp.SUM, sample_loss, axis=0)
    accuracy = strategy.reduce(tf.distribute.ReduceOp.SUM, sample_accuracy, axis=0)
    return loss, accuracy

In [25]:
@tf.function
def validation_step1(glyph, target):
    @tf.function
    def dist_step(glyph, target):
        sample_loss = tf.zeros([glyph.shape[0]]); sample_accuracy = tf.zeros([glyph.shape[0]])
        feature = encoder(glyph, training=False)
        for i in range(1, target.shape[1]):
            position = tf.constant(i-1, dtype='int64', shape=[target.shape[0]])
            prediction, weight = simple_decoder([feature, position])
            sample_loss += loss_function(target[:, i], prediction)
            sample_accuracy += accuracy_function(target[:, i], prediction)
        return sample_loss / (target.shape[1] - 1), sample_accuracy / (target.shape[1] - 1)

    sample_loss, sample_accuracy = strategy.run(dist_step, args=(glyph, target))
    loss = strategy.reduce(tf.distribute.ReduceOp.SUM, sample_loss, axis=0)
    accuracy = strategy.reduce(tf.distribute.ReduceOp.SUM, sample_accuracy, axis=0)
    return loss, accuracy

In [26]:
def step1(epoch):
    start = time.time()
    total_loss = 0; val_loss = 0
    total_accuracy = 0; val_accuracy = 0

    for (batch, (glyph_tensor, target, length, total_dups, curr_dups)) in enumerate(dataset, start=1):
        t_loss, accuracy = train_step1(glyph_tensor, target, length, total_dups, curr_dups)
        total_loss += t_loss
        total_accuracy += accuracy
        nums = min(num_samples, batch * BATCH_SIZE)
        print('Epoch {}, Train Loss {:.4f}, Accuracy {:.2%}; progress {:.1%}, taken {:.0f} sec'.format(
            epoch + 1, total_loss/nums, total_accuracy / nums, nums / num_samples, time.time() - start), end='\r')
    
    for (glyph_tensor, target, _, _, _) in val_dataset:
        t_loss, accuracy = validation_step1(glyph_tensor, target)
        val_loss += t_loss
        val_accuracy += accuracy
   
    # storing the epoch end loss value to plot later 
    with strategy.scope():
        ckpt_manager_step1.save()

    print ('Epoch {}, Train Loss {:.4f}, Accuracy {:.2%} | Validation Loss {:.4f}, Accuracy {:.2%}; taken {:.0f} sec'.format(
        epoch+1, total_loss/num_samples, total_accuracy/num_samples, val_loss/num_samples_val, val_accuracy/num_samples_val, time.time() - start))

### Step 2

In [27]:
def predict(features, max_length, length, total_dups, curr_dups):
    # start with 0
    dec_input = tf.constant([[[1] + [0] * (VOCAB - 1)]] * features.shape[0], dtype='float32')
    hidden = decoder.reset_state(batch_size=features.shape[0])
    probability = tf.ones([features.shape[0]], dtype='float32')
    # iterate predictions, no teacher forcing here
    for i in range(max_length):
        prediction, hidden, attention_weights = decoder(
            [tf.expand_dims(dec_input[:, i, :], 1), length, total_dups, curr_dups, features, hidden], training=False, teacher_forcing=False)
        # we need deterministic result
        prediction = tf.math.softmax(prediction, axis=-1)
        probability *= tf.math.reduce_max(prediction, axis=-1)
        dec_input = tf.concat([dec_input, tf.expand_dims(prediction, 1)], axis=1)
    return tf.math.argmax(dec_input, axis=-1), probability

In [28]:
def predict_next(features, target, length, total_dups, curr_dups, training=True, teacher_forcing=True):
    hidden = decoder.reset_state(batch_size=features.shape[0])
    predictions = tf.zeros([features.shape[0], 1, VOCAB], dtype='float32')
    if not teacher_forcing:
        previous = tf.constant([[[1] + [0] * (VOCAB - 1)]] * features.shape[0], dtype='float32')
    for i in range(target.shape[1]-1):
        if teacher_forcing:
            previous = tf.expand_dims(target[:, i], 1)
        prediction, hidden, attention_weights = decoder(
            [previous, length, total_dups, curr_dups, features, hidden], training=training, teacher_forcing=teacher_forcing)
        predictions = tf.concat([predictions, tf.expand_dims(prediction, 1)], axis=1)
        if not teacher_forcing:
            previous = tf.expand_dims(tf.math.softmax(prediction, axis=-1), 1)
    return predictions[:, 1:, :]

In [29]:
def loss_function_step2(real, pred):
    loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=real, logits=pred)
    return tf.reduce_sum(loss_, axis=1)
    
def accuracy_function_step2(real, pred):
    accuracy = tf.math.reduce_all(pred == real, axis=1)
    return tf.cast(accuracy, tf.float32)

In [30]:
@tf.function
def train_step2(glyph_tensor, target, length, total_dups, curr_dups, teacher_forcing=True):
    @tf.function
    def dist_step(glyph_tensor, target, length, total_dups, curr_dups, teacher_forcing):
        with tf.GradientTape() as tape:
            features = encoder(glyph_tensor)

            with tf.GradientTape() as tape_dups:
                dups_pred = dup_decoder([features, None, None])
                loss_dups = tf.reduce_sum(loss_function(tf.math.argmax(total_dups, axis=-1), dups_pred)) / BATCH_SIZE
            dups_pred = tf.nn.softmax(dups_pred, axis=-1)

            with tf.GradientTape() as tape_length:
                if teacher_forcing:
                    length_pred = length_decoder([features, total_dups, curr_dups])
                else:
                    length_pred = length_decoder([features, dups_pred, curr_dups])
                loss_length = tf.reduce_sum(loss_function(tf.math.argmax(length, axis=-1), length_pred)) / BATCH_SIZE
            length_pred = tf.nn.softmax(length_pred, axis=-1)

            if teacher_forcing:
                predictions = predict_next(features, target, length, total_dups, curr_dups, teacher_forcing=teacher_forcing)
            else:
                predictions = predict_next(features, target, length_pred, dups_pred, curr_dups, teacher_forcing=teacher_forcing)
            sample_loss = loss_function_step2(target[:, 1:], predictions)
            loss = tf.reduce_sum(sample_loss) / BATCH_SIZE

        trainable_variables = decoder.trainable_variables + encoder.trainable_variables
        gradients = tape.gradient(loss, trainable_variables)
        optimizer_step2.apply_gradients(zip(gradients, trainable_variables))

        gradients_length = tape_length.gradient(loss_length, length_decoder.trainable_variables)
        optimizer_length.apply_gradients(zip(gradients_length, length_decoder.trainable_variables))

        gradients_dups = tape_dups.gradient(loss_dups, dup_decoder.trainable_variables)
        optimizer_dups.apply_gradients(zip(gradients_dups, dup_decoder.trainable_variables))

        # calculate accuracy based on the code's whole string
        predictions_id, _ = predict(features, MAX_LEN, length_pred, dups_pred, curr_dups)
        sample_accuracy = accuracy_function_step2(predictions_id, target)
        sample_accuracy_length = accuracy_function(tf.math.argmax(length, axis=-1), length_pred)
        sample_accuracy_dups = accuracy_function(tf.math.argmax(total_dups, axis=-1), dups_pred)

        return sample_loss / (target.shape[1] - 1), sample_accuracy, sample_accuracy_length, sample_accuracy_dups
    
    sample_loss, sample_accuracy, sample_accuracy_length, sample_accuracy_dups = strategy.run(dist_step, args=(glyph_tensor, target, length, total_dups, curr_dups, teacher_forcing))
    loss = strategy.reduce(tf.distribute.ReduceOp.SUM, sample_loss, axis=0)
    accuracy = strategy.reduce(tf.distribute.ReduceOp.SUM, sample_accuracy, axis=0)
    accuracy_length = strategy.reduce(tf.distribute.ReduceOp.SUM, sample_accuracy_length, axis=0)
    accuracy_dups = strategy.reduce(tf.distribute.ReduceOp.SUM, sample_accuracy_dups, axis=0)
    return loss, accuracy, accuracy_length, accuracy_dups

In [31]:
@tf.function
def validation_step2(glyph_tensor, target, length, total_dups, curr_dups, cal_percil=False):
    @tf.function
    def dist_step(glyph_tensor, target, length, total_dups, curr_dups):
        features = encoder(glyph_tensor, training=False)
        dups_pred = tf.nn.softmax(dup_decoder([features, None, None]), axis=-1)
        length_pred = tf.nn.softmax(length_decoder([features, dups_pred, curr_dups]), axis=-1)
        predictions = predict_next(features, target, length_pred, dups_pred, curr_dups, training=False, teacher_forcing=False)
        sample_loss = loss_function_step2(target[:, 1:], predictions)

        # calculate accuracy based on the code's whole string
        predictions_id, probability = predict(features, MAX_LEN, length_pred, dups_pred, curr_dups)
        sample_accuracy = accuracy_function_step2(predictions_id, target)
        sample_accuracy_length = accuracy_function(tf.math.argmax(length, axis=-1), length_pred)
        sample_accuracy_dups = accuracy_function(tf.math.argmax(total_dups, axis=-1), dups_pred)

        return sample_loss / (target.shape[1] - 1), sample_accuracy, sample_accuracy_length, sample_accuracy_dups, probability
    
    sample_loss, sample_accuracy, sample_accuracy_length, sample_accuracy_dups, sample_prob = strategy.run(dist_step, args=(glyph_tensor, target, length, total_dups, curr_dups))
    loss = strategy.reduce(tf.distribute.ReduceOp.SUM, sample_loss, axis=0)
    accuracy = strategy.reduce(tf.distribute.ReduceOp.SUM, sample_accuracy, axis=0)
    accuracy_length = strategy.reduce(tf.distribute.ReduceOp.SUM, sample_accuracy_length, axis=0)
    accuracy_dups = strategy.reduce(tf.distribute.ReduceOp.SUM, sample_accuracy_dups, axis=0)
    if cal_percil:
        all_accuracy = tf.concat(strategy.experimental_local_results(sample_accuracy), axis=0)
        all_prob = tf.concat(strategy.experimental_local_results(sample_prob), axis=0)
        return loss, accuracy, accuracy_length, accuracy_dups, all_accuracy, all_prob
    else:
        return loss, accuracy, accuracy_length, accuracy_dups

In [32]:
def step2(epoch, EPOCH):
    start = time.time()
    total_loss = 0; val_loss = 0; dups_accu = 0; val_dups_accu = 0
    total_accuracy = 0; val_accuracy = 0; len_accu = 0; val_len_accu = 0
    
    num_steps = num_samples // BATCH_SIZE + min(1, num_samples % BATCH_SIZE)
    counts = int(num_steps * max(min(2.5 * epoch / EPOCH - 0.5, 1.0), 0.0))
    choices = np.random.choice(range(1, num_steps+1), counts, replace=False)
    
    for (batch, (glyph_tensor, target, length, total_dups, curr_dups)) in enumerate(dataset, start=1):
        teacher_forcing = not batch in choices

        t_loss, accuracy, accuracy_length, accuracy_dups = train_step2(glyph_tensor, target, length, total_dups, curr_dups, teacher_forcing=teacher_forcing)
        total_loss += t_loss; total_accuracy += accuracy
        len_accu += accuracy_length; dups_accu += accuracy_dups
        
        nums = min(num_samples, batch * BATCH_SIZE)
        print('Epoch {}, Train Loss {:.4f}, Accuracy {:.2%}; Length Accuracy {:.2%}, Dups Accuracy {:.2%}; progress {:.1%}, taken {:.0f} sec'.format(
            epoch + 1, total_loss/nums, total_accuracy/nums, len_accu/nums, dups_accu/nums, nums/num_samples, time.time() - start), end='\r')
    
    for (glyph_tensor, target, length, total_dups, curr_dups) in val_dataset:
        t_loss, accuracy, accuracy_length, accuracy_dups = validation_step2(glyph_tensor, target, length, total_dups, curr_dups)
        val_loss += t_loss; val_accuracy += accuracy
        val_len_accu += accuracy_length; val_dups_accu += accuracy_dups
   
    # storing the epoch end loss value to plot later
    with train_summary_writer.as_default():
        tf.summary.scalar('loss', (total_loss / num_samples), step=epoch)
        tf.summary.scalar('accuracy', (total_accuracy / num_samples), step=epoch)
        tf.summary.scalar('length_accuracy', (len_accu / num_samples), step=epoch)
        tf.summary.scalar('duplication_accuracy', (dups_accu / num_samples), step=epoch)
    with test_summary_writer.as_default():
        tf.summary.scalar('loss', val_loss / num_samples_val, step=epoch)
        tf.summary.scalar('accuracy', val_accuracy / num_samples_val, step=epoch)
        tf.summary.scalar('length_accuracy', val_len_accu / num_samples_val, step=epoch)
        tf.summary.scalar('duplication_accuracy', val_dups_accu / num_samples_val, step=epoch)
    
    with strategy.scope():
        ckpt_manager_step2.save()

    print('Epoch {}, Train Loss {:.4f}, Accuracy {:.2%}; Length Accuracy {:.2%}, Dups Accuracy {:.2%} | Validation Loss {:.4f}, Accuracy {:.2%}; Length Accuracy {:.2%}, Dups Accuracy {:.2%}; taken {:.0f} sec'.format(
        epoch + 1, total_loss/num_samples, total_accuracy/num_samples, len_accu/num_samples, dups_accu/num_samples, 
        val_loss/num_samples_val, val_accuracy/num_samples_val, val_len_accu/num_samples_val, val_dups_accu/num_samples_val, time.time() - start))

## Training

In [33]:
with strategy.scope():
    encoder = CNN_Encoder(embedding_dim = EBD_DIM)
    simple_decoder = Simple_Decoder(embedding_dim = EBD_DIM, max_length = MAX_LEN, hidden_size = UNIT_DIM, vocab_size = VOCAB)
    length_decoder = Dense_Decoder(max_length = MAX_LEN)
    dup_decoder = Dense_Decoder(max_length = MAX_DUP)
    decoder = RNN_Decoder(embedding_dim=EBD_DIM, hidden_size=UNIT_DIM, max_length = MAX_LEN, vocab_size=VOCAB)

In [34]:
# use a checkpoint to store weights
checkpoint_path_step1 = './checkpoints/train_step1'
ckpt_step1 = tf.train.Checkpoint(encoder=encoder, decoder=simple_decoder, length_decoder=length_decoder,
    dup_decoder=dup_decoder, optimizer=optimizer_step1, optimizer_length=optimizer_length, optimizer_dups=optimizer_dups)
ckpt_manager_step1 = tf.train.CheckpointManager(ckpt_step1, checkpoint_path_step1, max_to_keep=5)

In [35]:
# use a checkpoint to store weights
checkpoint_path_step2 = "./checkpoints/train_step2"
ckpt_step2 = tf.train.Checkpoint(encoder=encoder, decoder=decoder, length_decoder=length_decoder,
    dup_decoder=dup_decoder, optimizer=optimizer_step2, optimizer_length=optimizer_length, optimizer_dups=optimizer_dups)
ckpt_manager_step2 = tf.train.CheckpointManager(ckpt_step2, checkpoint_path_step2, max_to_keep=5)

In [36]:
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
test_log_dir = 'logs/gradient_tape/' + current_time + '/test'
#graph_log_dir = 'logs/gradient_tape/' + current_time + '/func'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)
#graph_summary_writer = tf.summary.create_file_writer(graph_log_dir)

### Step 1

In [37]:
EPOCHS_STEP1 = 20

epoch_step1 = 0
if ckpt_manager_step1.latest_checkpoint:
    epoch_step1 = int(ckpt_manager_step1.latest_checkpoint.split('-')[-1])
    ckpt_step1.restore(ckpt_manager_step1.latest_checkpoint)

In [38]:
while epoch_step1 < EPOCHS_STEP1:
    step1(epoch_step1)
    epoch_step1 += 1

Instructions for updating:
Use `tf.data.Iterator.get_next_as_optional()` instead.
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
Epoch 1, Train Loss 1.8967, Accuracy 45.99% | Validation Loss 1.6580, Accuracy

### Step 2

In [39]:
EPOCHS_STEP2 = 150

epoch_step2 = 0
if ckpt_manager_step2.latest_checkpoint:
    epoch_step2 = int(ckpt_manager_step2.latest_checkpoint.split('-')[-1])
    ckpt_step2.restore(ckpt_manager_step2.latest_checkpoint)

In [40]:
while epoch_step2 < EPOCHS_STEP2:
    step2(epoch_step2, EPOCHS_STEP2)
    epoch_step2 += 1

Epoch 1, Train Loss 0.8488, Accuracy 41.84%; Length Accuracy 95.73%, Dups Accuracy 97.25% | Validation Loss 0.5742, Accuracy 57.45%; Length Accuracy 89.41%, Dups Accuracy 93.59%; taken 181 sec
Epoch 2, Train Loss 0.3559, Accuracy 66.66%; Length Accuracy 96.23%, Dups Accuracy 97.71% | Validation Loss 0.4370, Accuracy 67.46%; Length Accuracy 93.21%, Dups Accuracy 94.31%; taken 131 sec
Epoch 3, Train Loss 0.2867, Accuracy 72.93%; Length Accuracy 96.49%, Dups Accuracy 97.97% | Validation Loss 0.4897, Accuracy 68.40%; Length Accuracy 91.96%, Dups Accuracy 93.77%; taken 131 sec
Epoch 4, Train Loss 0.2483, Accuracy 76.54%; Length Accuracy 96.69%, Dups Accuracy 98.06% | Validation Loss 0.4956, Accuracy 68.10%; Length Accuracy 92.67%, Dups Accuracy 93.52%; taken 131 sec
Epoch 5, Train Loss 0.2215, Accuracy 78.91%; Length Accuracy 96.65%, Dups Accuracy 98.04% | Validation Loss 0.4803, Accuracy 72.15%; Length Accuracy 92.31%, Dups Accuracy 93.33%; taken 131 sec
Epoch 6, Train Loss 0.2024, Accurac

## Statistics

In [41]:
def percentiles(dataset):
    all_accuracies = []
    all_probs = []
    for (glyph_tensor, target, length, total_dups, curr_dups) in dataset:
        t_loss, accuracy, accuracy_length, accuracy_dups, all_accuracy, all_prob = validation_step2(glyph_tensor, target, length, total_dups, curr_dups)
        all_accuracies.append(all_accuracy)
        all_probs.append(all_prob)
    all_accuracies = tf.concat(all_accuracies, axis = 0)
    all_probs = tf.concat(all_probs, axis = 0)
    correctness = tf.cast(all_accuracies, 'bool')
    correct = tf.boolean_mask(all_probs, correctness)
    wrong = tf.boolean_mask(all_probs, ~correctness)
    return correct, wrong

In [42]:
def print_val_percentile():
    correct, wrong = percentiles(val_dataset)
    target_percentiles = [1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99]
    correct_percentile = np.percentile(correct.numpy(), target_percentiles)
    wrong_percentile = np.percentile(wrong.numpy(), target_percentiles)
    return pd.DataFrame([correct_percentile, wrong_percentile], columns=target_percentiles, index=['Correct', 'Wrong'])

In [43]:
print_val_percentile()

Unnamed: 0,1,2,5,10,20,30,40,50,60,70,80,90,95,98,99
Correct,0.543906,0.625736,0.81851,0.946189,0.98902,0.996495,0.998436,0.999179,0.99957,0.99978,0.9999,0.99996,0.999981,0.999991,0.999995
Wrong,0.170655,0.208686,0.287825,0.383096,0.505361,0.578287,0.667883,0.759542,0.846591,0.917368,0.965124,0.993017,0.997606,0.99949,0.999826


## Save Model

In [44]:
cangjie = Cangjie(encoder, decoder, length_decoder, dup_decoder)
cangjie.build(input_shape = (None, 64, 64, 1))

In [45]:
cangjie.save('./Cangjie_Model/')

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: ./Cangjie_Model/assets
