In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.layers import Input, LSTM, Dense,Attention ,Concatenate

In [None]:
#Uploading google drive on colab
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#Accessing pre-processed datasets fron files
x_len=np.load('/content/drive/MyDrive/writemate data/x_len.npy')
x=np.load('/content/drive/MyDrive/writemate data/x.npy')
c_len=np.load('/content/drive/MyDrive/writemate data/c_len.npy')
c=np.load('/content/drive/MyDrive/writemate data/c.npy')

In [None]:
#converting character sequences to one-hot
c_hot=tf.one_hot(c,73)

In [None]:
#Reshaping the data to be inputted
y=x[:,1:,:]
x=x[:,:1199,:]

In [None]:
#The coordinates of y are one step ahead of x
print(x[100][121])
print(y[100][120])

[0.30109042 0.43289304 0.        ]
[0.30109042 0.43289304 0.        ]


In [None]:
# separating the mdn parameters
def parse_parameters(z, eps=1e-8, sigma_eps=1e-4):
        output_mixture_components=20
        pis, sigmas, rhos, mus, es = tf.split(
            z,
            [
                1*output_mixture_components,
                2*output_mixture_components,
                1*output_mixture_components,
                2*output_mixture_components,
                1
            ],
            axis=-1
        )
        pis = tf.keras.activations.softmax(pis, axis=-1)
        sigmas = tf.clip_by_value(tf.math.exp(sigmas), sigma_eps, np.inf)
        rhos = tf.clip_by_value(tf.keras.activations.tanh(rhos), eps - 1.0, 1.0 - eps)
        es = tf.clip_by_value(tf.keras.activations.sigmoid(es), eps, 1.0 - eps)
        return pis, mus, sigmas, rhos, es

In [None]:
#defining and carrying out loss function
def loss( y, lengths, pis, mus, sigmas, rho, es, eps=1e-8):
         sigma_1, sigma_2 = tf.split(sigmas, 2, axis=2)
         y_1, y_2, y_3 = tf.split(y, 3, axis=2)
         mu_1, mu_2 = tf.split(mus, 2, axis=2)

         norm = 1.0 / (2*np.pi*sigma_1*sigma_2 * tf.sqrt(1 - tf.square(rho)))
         Z = tf.square((y_1 - mu_1) / (sigma_1)) + tf.square((y_2 - mu_2) / (sigma_2)) - 2*rho*(y_1 - mu_1)*(y_2 - mu_2) / (sigma_1*sigma_2)

         exp = -1.0*Z / (2*(1 - tf.square(rho)))
         gaussian_likelihoods = tf.exp(exp) * norm
         gmm_likelihood = tf.reduce_sum(pis * gaussian_likelihoods, 2)
         gmm_likelihood = tf.clip_by_value(gmm_likelihood, eps, np.inf)

         bernoulli_likelihood = tf.squeeze(tf.where(tf.equal(tf.ones_like(y_3), y_3), es, 1 - es))

         nll = -(tf.math.log(gmm_likelihood) + tf.math.log(bernoulli_likelihood))
         sequence_mask = tf.logical_and(
            tf.sequence_mask(lengths, maxlen=tf.shape(y)[1]),
            tf.logical_not(tf.math.is_nan(nll)),
         )
         nll = tf.where(sequence_mask, nll, tf.zeros_like(nll))
         num_valid = tf.reduce_sum(tf.cast(sequence_mask, tf.float32), axis=1)

         sequence_loss = tf.reduce_sum(nll, axis=1) / tf.maximum(num_valid, 1.0)
        # element_loss = tf.reduce_sum(nll) / tf.maximum(tf.reduce_sum(num_valid), 1.0)
         return sequence_loss #, element_loss

In [None]:
#LSTM and attention layer combined
class LSTM_Attention(tf.keras.Model):
  def __init__(self, sent_max_len, batch_size,
                 hidden_size,  win_in_size, win_out_size
                 ):
    super(LSTM_Attention, self).__init__()
    self.sent_max_len = sent_max_len
    # self.vars_per_fuct = vars_per_fuct
    self.lstm1 = tf.keras.layers.LSTM(400, return_sequences=True)
    self.softwindow = tf.keras.layers.Dense(win_out_size)
  def call(self, strks, onehots, sents_m, w_prev, k_prev, prev):
        timesteps = strks.shape[1]
       # sent_real_len = tf.reduce_sum(sents_m, axis=1)
        h1_list, wt_list = [], []

        for t in range(timesteps):
            # concat the stroke feature and sentence feature
            input_t = tf.concat([tf.squeeze(strks[:, t, :]), w_prev], axis=1)
            # first LSTM layer
          #  prev=tf.reshape(prev,(32,76,1))
            prev = self.lstm1(input_t, prev)
            h1_t = prev[0]
            # nn.utils.clip_grad_value_(h1_t, 10)  # gradient clip for LSTM
            h1_list.append(h1_t)

            # >>> attention mechanisim, formula (46) ~ (57)

            p = self.softwindow(h1_t)
            a, b, k = tf.split(p, num_or_size_splits=3, axis=1)
            a, b, k = tf.exp(a), tf.exp(b), k_prev + tf.exp(k)
            # >>>> compute the "dist" between current pos and all positions

            u = tf.constant(np.arange(self.sent_max_len + 1), dtype=tf.float32)
            u = tf.expand_dims(u, axis=0)
            # a, b, k for each pos of input sent and guassian functions

            pos = tf.expand_dims(k, axis=2) - u
            gravity = -1 * tf.expand_dims(b, axis=2) * (pos**2)
            phi = tf.reduce_sum(tf.expand_dims(a, axis=2) * tf.exp(gravity), axis=1)
            except_last = tf.expand_dims(tf.slice(phi, [0, 0, 0], [-1, self.sent_max_len, 1]), axis=2)
            w_t = tf.reduce_sum(except_last * onehots, axis=1)
            # print('w_t shape:', w_t.shape)
            # exit(0)

            wt_list.append(w_t)
            # >>>

            # update parameters for next timestep
            k_prev = k
            w_prev = w_t

        # collection the hidden state from LSTM1 for LSTM2
        hid1 = tf.stack(h1_list, axis=1)  # (batch, timesteps, hidden_size)
        win_vec = tf.squeeze(tf.stack(wt_list, axis=1), axis=2)  # (batch, timesteps, len(alphabet))
        return hid1, prev, win_vec, w_prev, k_prev, phi

w_prev,k_prev,prev should be initalised with zeros of shape (batch_size,hidden_dim)

In [None]:
#The handwriting synthesis model
class HandwritingSynthesis(tf.keras.Model):
    def __init__(self, sent_max_len, batch_size, hidden_size=400):
        super(HandwritingSynthesis, self).__init__()
        # self.device = device
        self.batch_size = batch_size
        self.hidden_size = hidden_size
        self.num_layers = 3
        strk_dim, sent_dim = (3, 75)
        lstm1_in_size = strk_dim + sent_dim
        win_in_size = hidden_size
        win_out_size = 10 * 3 # 3K
        mdn_out_size = 1 + ((1 + 1 + 2 + 2) * 20)

        self.lstm1 = LSTM_Attention(sent_max_len, lstm1_in_size, hidden_size, win_in_size, win_out_size)
        self.lstm2 = tf.keras.layers.LSTM(hidden_size, return_sequences=True, return_state=True) # batch_first=True
        self.lstm3 = tf.keras.layers.LSTM(hidden_size, return_sequences=True, return_state=True) # batch_first=True
        self.mdn = tf.keras.layers.Dense(121, activation='relu')
        self.loss = CustomMSE()

        # self.tanh = tf.keras.layers.Activation('tanh')
    def call(self,x,x_len,c_hot,c_len,w_prev,k_prev,prev1,prev2,prev3,batch_size,win_out_size,bias=0.):

        timesteps = x.shape[1]
        sent_len = c_len.shape[0]
        # LSTM 1 ( with attention mechanism )
        hid1, prev1, win_vec, w_prev, k_prev, phi_prev = self.lstm1(
            x, c_hot, c_len,w_prev,k_prev, prev1)

        # LSTM 2
        # Skip connection, LSTM1's output and window vector
        lstm2_input = tf.concat([x, hid1, win_vec], axis=-1)
        hid2, _, _ = self.lstm2(lstm2_input, initial_state=[prev2, prev2])

        # LSTM 3
        lstm3_input = tf.concat([x, hid2, win_vec], axis=-1)
        hid3, _, _ = self.lstm3(lstm3_input, initial_state=[prev3, prev3])

        # Mixture Gaussian Network
        lstm_output = tf.concat([hid1, hid2, hid3], axis=-1)
        params = self.mdn(lstm_output)

        pis, mus, sigmas, rhos, es = parse_parameters(params, eps=1e-8, sigma_eps=1e-4)
        loss =  self.loss(y ,x_len, pis, mus, sigmas, rhos, es, eps=1e-8)
        return loss

In [None]:
#Training the model
def train_conditional_model(input_data):
 batch_size = 32
 num_of_batches = 11911 // batch_size
 K = 10
 hidden_size = 400

# Define the model
 model = HandwritingSynthesis(75, batch_size=batch_size, hidden_size=hidden_size)

# Initialize initial values for k_prev, h1, c1, h2, c2, h3, c3
 k_prev = tf.zeros((batch_size, K), dtype=tf.float32)
 h1 = c1 = tf.zeros((batch_size, hidden_size), dtype=tf.float32)
 h2 = c2 = tf.zeros((1, batch_size, hidden_size), dtype=tf.float32)
 h3 = c3 = tf.zeros((1, batch_size, hidden_size), dtype=tf.float32)

# Define optimizer (replace with your optimizer)
 optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
 num_epochs=10
# Training loop
 for epoch in range(num_epochs):
    ct_loss = 0
    for i in range(0, batch_size * num_of_batches, batch_size):
        strks, strks_m, onehots,sent_m = x[i:i + batch_size], x_len[i:i + batch_size], c_hot[i:i + batch_size],c_len[i:i+batch_size]
        target = y
        w_prev = onehots[i:i+batch_size,0,:]
        with tf.GradientTape() as tape:
            loss = model(strks,strks_m,onehots,sent_m,w_prev,k_prev,(h1, c1), (h2, c2), (h3, c2),32,30)
            ct_loss += loss
#x, strks_m, onehots, sents_m, w_prev, k_prev,(h1, c1), (h2, c2), (h3, c2)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        loss.backward()
        optimizer.step()

In [None]:
#model
train_conditional_model(input_data)

ValueError: ignored