To-Do
- [ ] Create Separate Functions for each task
- [ ] Evaluation of our model
- [ ] How to generalize the model?
- [ ] More about preprocessing
- [ ] Improve Handling of rare words

In [1]:
!pip install keras-preprocessing

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
!pip install -U tensorflow-text==2.11.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
import numpy as np
import pathlib
from keras.utils import to_categorical
import pickle
import tensorflow as tf
import tensorflow_text as tf_text
from keras.models import Model, Sequential
from keras.layers import Input, LSTM, Dense, Embedding, SimpleRNN, RepeatVector, TimeDistributed

In [4]:
tf.__version__

'2.11.1'

## English to Spanish

In [5]:
import pathlib

path_to_zip = tf.keras.utils.get_file(
    'spa-eng.zip', origin='http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip',
    extract=True)

path_to_file = pathlib.Path(path_to_zip).parent/'spa-eng/spa.txt'

In [6]:
def load_data(path):
  text = path.read_text(encoding='utf-8')

  lines = text.splitlines()
  pairs = [line.split('\t') for line in lines]

  context = np.array([context for target, context in pairs])
  target = np.array([target for target, context in pairs])

  return target, context

data = load_data(path_to_file)

In [7]:
inputs = np.array(data[0][:10000])
outputs = np.array(data[1][:10000])

In [8]:
# vectorizer = tf.keras.layers.TextVectorization(standardize="lower_and_strip_punctuation", output_sequence_length=src)
# text_dataset = tf.data.Dataset.from_tensor_slices(inputs)
# vectorizer.adapt(text_dataset)

In [9]:
# # dump(vectorizer,open('drive/MyDrive/Machine Translation/vectroizer.pkl','wb'))
# import pickle
# pickle.dump({'config': vectorizer.get_config(),
#              'weights': vectorizer.get_weights()}
#             , open("drive/MyDrive/Machine Translation/vectorizer.pkl", "wb"))

In [10]:

# saved = pickle.load(open('drive/MyDrive/Machine Translation/vectorizer.pkl','rb'))
# vectorizer = tf.keras.layers.TextVectorization.from_config(saved['config'])

# vectorizer.adapt(tf.data.Dataset.from_tensor_slices(['random']))
# vectorizer.set_weights(saved['weights'])

In [11]:
# spanish_vectorizer = tf.keras.layers.TextVectorization(standardize="lower_and_strip_punctuation", output_sequence_length=tar)
# text_dataset = tf.data.Dataset.from_tensor_slices(outputs)
# spanish_vectorizer.adapt(text_dataset)

In [12]:
# import pickle
# pickle.dump({'config': spanish_vectorizer.get_config(),
#              'weights': spanish_vectorizer.get_weights()}
#             , open("drive/MyDrive/Machine Translation/spanish_vectorizer.pkl", "wb"))

In [13]:
# saved = pickle.load(open('drive/MyDrive/Machine Translation/spanish_vectorizer.pkl','rb'))
# spanish_vectorizer = tf.keras.layers.TextVectorization.from_config(saved['config'])

# spanish_vectorizer.adapt(tf.data.Dataset.from_tensor_slices(['random']))
# spanish_vectorizer.set_weights(saved['weights'])

In [14]:
# train_X = vectorizer(inputs[indices])
# train_Y = spanish_vectorizer(outputs[indices])
# # model.fit(train_X, train_Y)

In [15]:
def create_tokenizer(source, target, num_words=5000, lower=True):
  src_tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words,lower=lower)
  tar_tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words,lower=lower)

  src_tokenizer.fit_on_texts(source)
  tar_tokenizer.fit_on_texts(target)
  return src_tokenizer, tar_tokenizer

In [16]:
eng_tokenizer,spa_tokenizer = create_tokenizer(inputs, outputs)

In [17]:
def transform_data(source, target):
  source = eng_tokenizer.texts_to_sequences(inputs)
  source = tf.keras.preprocessing.sequence.pad_sequences(source, maxlen=8, padding='post')

  target = spa_tokenizer.texts_to_sequences(outputs)
  target = tf.keras.preprocessing.sequence.pad_sequences(target, maxlen=8, padding='post')
  return source, target

def split_data(source, target, train_split=0.8):
  size = inputs.shape[0]
  indices = np.random.randint(0, size, size=(int(size*train_split),))

  trainX = source[indices]
  trainY = target[indices]

  mask = np.ones(size,bool)
  mask[indices] = False
  testX = source[mask]
  testY = target[mask]
  return trainX, trainY, testX, testY

source, target = transform_data(inputs, outputs)
trainX, trainY, testX, testY = split_data(source, target, 0.8)

In [18]:
def encode_output(sequences, vocab_size):
 ylist = list()
 for sequence in sequences:
  encoded = tf.keras.utils.to_categorical(sequence, num_classes=vocab_size)
  ylist.append(encoded)
 y = np.array(ylist)
 y = y.reshape(sequences.shape[0], sequences.shape[1], vocab_size)
 return y

trainY = encode_output(trainY, len(spa_tokenizer.word_index)+1)

In [19]:
tar_vocab = len(spa_tokenizer.word_index)+1
src_vocab = len(eng_tokenizer.word_index)+1
src = max(len(line.split()) for line in inputs)
tar = max(len(line.split()) for line in outputs)

In [20]:
layer1 = Embedding(src_vocab, 16, input_length=8)
layer2 = LSTM(256)
model = Sequential()
model.add(layer1)
model.add(layer2)

# we want to replicate the context vector for each time step
model.add(RepeatVector(8))
model.add(LSTM(256, return_sequences=True))

# converting decoder output to our desired sequence format
model.add(TimeDistributed(Dense(tar_vocab, activation='softmax')))
model.compile(optimizer='adam', loss='categorical_crossentropy')

In [21]:
trainX[0,:]

array([  13, 1570,    0,    0,    0,    0,    0,    0], dtype=int32)

In [31]:
# creating an encoder class
class Encoder(tf.keras.Model):
  def __init__(self, emb_vocab, emb_dim, units):
    super(Encoder, self).__init__()
    self.emb_vocab = emb_vocab
    self.emb_dim = emb_dim
    self.units = units

    self.embedding = Embedding(self.emb_vocab, self.emb_dim, input_length=8)
    self.feature_extractor = tf.keras.layers.Bidirectional(merge_mode='sum', layer=tf.keras.layers.GRU(self.units,return_sequences=True,return_state=True,recurrent_initializer='glorot_uniform'))

  def call(self,input):
    emb = self.embedding(input)
    enc_output, enc_hidden, _ = self.feature_extractor(emb)

    return enc_output, enc_hidden

In [32]:
enc = Encoder(src_vocab,16,256)

In [33]:
trainX.shape

(8000, 8)

In [34]:
enc_output, enc_hiddne = enc(trainX)

In [26]:
np.array(enc_output[0,:,:]).reshape(8,-1).shape

(8, 256)

In [27]:
w = tf.nn.softmax(Dense(1,activation='tanh')(np.array(enc_output[:,:,:]).reshape(-1,8,256)))

In [28]:
np.array(w).shape

(8000, 8, 1)

In [29]:
enc_output.shape

TensorShape([8000, 8, 256])

In [30]:
weights = np.array(tf.nn.softmax(np.array(w).reshape(-1,1,8)))

In [31]:
class Attention(tf.keras.layers.Layer):
  def __init__(self, units ):
    super(Attention, self).__init__()
    self.units = units
    self.alignment = Dense(1, activation='tanh')
    self.attention_weights = None

  def call(self, enc_output, dec_hidden_state):
    energy = self.alignment(enc_output)
    self.attention_weights = tf.nn.softmax(tf.reshape(energy, [-1,1,8]))
    context_vector = tf.matmul(self.attention_weights,enc_output)
    return context_vector, self.attention_weights


In [32]:
att = Attention(256)
context, weights = att(enc_output, None)
context.shape

TensorShape([8000, 1, 256])

In [33]:
class Decoder(tf.keras.Model):
  def __init__(self, units, tar_vocab):
    super(Decoder, self).__init__()
    self.units = units
    self.tar_vocab = tar_vocab
    self.attention = Attention(units)
    self.dec_cell = LSTM(256, return_sequences=True, return_state=True)
    self.output_func = Dense(self.tar_vocab)
    
  def call(self, output, enc_output, hidden):
    context, attention_weights = self.attention(enc_output, hidden)

    # concat the output and the context vector
    dec_input = tf.concat([context, output], axis=-1)
    output, hidden_state,_ = self.dec_cell(dec_input)
    output = self.output_func(output)

    return output, hidden_state, attention_weights
    

In [34]:
output = trainY[:,0,:].reshape(-1,1,4961)

In [35]:
dec = Decoder(256, tar_vocab)
dec_output, hidden_state, attention_weights = dec(output,enc_output,None)

In [36]:
dec_output.shape

TensorShape([8000, 1, 4961])

In [37]:
def loss(y_true, y_pred):
  loss_fun = tf.keras.losses.SparseCategoricalCorssentropy()
  loss = loss_fun(y_true, y_pred)

  return loss

In [38]:
optimizer = tf.keras.optimizers.Adam()

In [39]:
dec = Decoder(256, tar_vocab)

In [46]:
@tf.function
def training_step(input, y):
  loss = 0
  # enc = Encoder(tar_vocab, 16, 256)
  # dec = Decoder(256, tar_vocab)
  with tf.GradientTape() as tape:
    enc_output, enc_hidden = enc(input)
    dec_hidden = enc_hidden
    
    output = tf.reshape(y[:,0,:], [-1,1,4961])
    print('hello')
    for i in range(1, y.shape[1]):
      dec_output, hidden_state, attention_weights = dec(output, enc_output, dec_hidden)
      loss += tf.keras.losses.CategoricalCrossentropy()(tf.reshape(y[:,i,:], (-1,1,4961)),dec_output)
    batch_loss = loss/int(y.shape[1])
    # print(loss, batch_loss)
    # variables = enc.trainable_variables + dec.trainable_variables
    variables = enc.trainable_variables + dec.trainable_variables + dec.attention.variables

    gradients = tape.gradient(loss, variables)
    # optimizer.build(variables)
    optimizer.apply_gradients(zip(gradients, variables))
    return batch_loss

In [67]:
def train(epochs, optimizer=tf.keras.optimizers.Adam(), trainX=trainX, trainY=trainY, epochs_per_step=32):
  dataset = tf.data.Dataset.from_tensor_slices((trainX, trainY))
  length = trainX.shape[0]
  batches = int(length/epochs_per_step)
  losses = []
  for epoch in range(50):
    start = time.time()
    epoch_loss = 0
    for batch in range(batches):
      indices_start = batch*epochs_per_step
      indices_end = indices_start+epochs_per_step
      input = trainX[indices_start:indices_end]
      tar = trainY[indices_start:indices_end]
      # print(input.shape)
      # print(tar.shape)
      # break
      batch_loss = training_step(input, tar)
      epoch_loss += batch_loss
    
    losses.append(epoch_loss/batches)
    print(f"Epoch {epoch+1} loss {epoch_loss/batches}")
    print(time.time()-start)
  return losses
    

In [68]:
import time
start = time.time()
losses = train(10)
end = time.time()
end-start

Epoch 1 loss 10.791194915771484
6.24383282661438
Epoch 2 loss 10.864636421203613
5.6867029666900635
Epoch 3 loss 10.297652244567871
5.833936452865601
Epoch 4 loss 10.426053047180176
6.16764760017395
Epoch 5 loss 11.34715747833252
5.648762941360474
Epoch 6 loss 10.3530855178833
6.228830099105835
Epoch 7 loss 9.13405704498291
5.696537494659424
Epoch 8 loss 8.522539138793945
5.733081340789795
Epoch 9 loss 8.922370910644531
6.1769609451293945
Epoch 10 loss 8.295306205749512
5.596759557723999
Epoch 11 loss 8.287266731262207
6.001916408538818
Epoch 12 loss 7.410953998565674
5.672987461090088
Epoch 13 loss 8.080018043518066
6.408250093460083
Epoch 14 loss 7.115900039672852
6.0370728969573975
Epoch 15 loss 7.766013145446777
5.616457939147949
Epoch 16 loss 6.580559730529785
6.092764616012573
Epoch 17 loss 6.037989139556885
5.673971891403198
Epoch 18 loss 5.450923442840576
5.634390115737915
Epoch 19 loss 4.9709367752075195
6.062699794769287
Epoch 20 loss 6.567409038543701
5.578620672225952
Epoch

294.55402755737305

In [69]:
losses

[<tf.Tensor: shape=(), dtype=float32, numpy=10.791195>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.864636>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.297652>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.426053>,
 <tf.Tensor: shape=(), dtype=float32, numpy=11.3471575>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.3530855>,
 <tf.Tensor: shape=(), dtype=float32, numpy=9.134057>,
 <tf.Tensor: shape=(), dtype=float32, numpy=8.522539>,
 <tf.Tensor: shape=(), dtype=float32, numpy=8.922371>,
 <tf.Tensor: shape=(), dtype=float32, numpy=8.295306>,
 <tf.Tensor: shape=(), dtype=float32, numpy=8.287267>,
 <tf.Tensor: shape=(), dtype=float32, numpy=7.410954>,
 <tf.Tensor: shape=(), dtype=float32, numpy=8.080018>,
 <tf.Tensor: shape=(), dtype=float32, numpy=7.1159>,
 <tf.Tensor: shape=(), dtype=float32, numpy=7.766013>,
 <tf.Tensor: shape=(), dtype=float32, numpy=6.5805597>,
 <tf.Tensor: shape=(), dtype=float32, numpy=6.037989>,
 <tf.Tensor: shape=(), dtype=float32, numpy=5.4509234>,
 <

In [None]:
class translator(tf.keras.Model):
  def __init__(self, emb_vocab, emb_dim, units, tar_vocab):
    super().__init__()

    self.encoder = Encoder(emb_vocab, emb_dim, units)
    self.decoder = Encoder(units, tar_vocab)

  def call(self,inputs):
    x = self.encoder(inputs)
    x = self.decoder(outputs)

In [None]:
trainX.shape

(8000, 8)

In [58]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 8, 16)             37888     
                                                                 
 lstm_4 (LSTM)               (None, 256)               279552    
                                                                 
 repeat_vector_1 (RepeatVect  (None, 8, 256)           0         
 or)                                                             
                                                                 
 lstm_5 (LSTM)               (None, 8, 256)            525312    
                                                                 
 time_distributed_1 (TimeDis  (None, 8, 4961)          1274977   
 tributed)                                                       
                                                                 
Total params: 2,117,729
Trainable params: 2,117,729
No

In [59]:
trainY.shape

(8000, 8, 4961)

In [60]:
y_true = np.array([[[0,1,0],[1,0,0]],[[1,0,0],[0,0,1]]])
y_pred = np.array([[[0.01,0.95,0.04],[0.97,0.02,0.01]],[[0.96,0.02,0.02],[0.02,0.03,0.95]]])
y_true.shape, y_pred.shape

((2, 2, 3), (2, 2, 3))

In [61]:
# y_true = np.array([[[0, 1, 0], [0, 0, 1]]])
# print(y_true.shape)
# y_pred = [[[0.05, 0.95, 0], [0.1, 0.8, 0.1]]]
# Using 'auto'/'sum_over_batch_size' reduction type.
cce = tf.keras.losses.CategoricalCrossentropy()
cce(y_true[1,:,:], y_pred[1,:,:]).numpy()

0.04605764445390287

In [66]:
model.fit(trainX, trainY, epochs=30, batch_size=64)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f32af328dc0>

In [None]:
outputs = [layer.output for layer in model.layers]

In [None]:
y_pred = model.predict(trainX)



In [None]:
loss = tf.keras.losses.CategoricalCrossentropy()

In [None]:
trainY[0, :,:].shape

(8, 4961)

In [None]:
def inference(model, input, source_tokenizer, tar_token_to_word):
  """
  Args:
  input - a string in the source language
  """
  # tokenized = source_tokenizer.texts_to_sequences(input)
  # test = tf.keras.preprocessing.sequence.pad_sequences(tokenized, maxlen=8, padding='post')
  prediction = model.predict(input)

  output = [np.argmax(vector) for vector in prediction[0]]

  output_list = []
  for i in output:
    if i == 0:
      break
    else:
      output_list.append(tar_token_to_word[i])

  output_sentence = ' '.join(output_list)
  return output_sentence, output

eng_word_to_token = eng_tokenizer.word_index
eng_token_to_word = {token:word for word, token in eng_word_to_token.items()}

spa_word_to_token = spa_tokenizer.word_index
spa_token_to_word = {token:word for word, token in spa_word_to_token.items()}

In [None]:
sent, output = inference(model, trainX[123:134], eng_tokenizer, spa_token_to_word)

In [None]:
layers = [layer for layer in model.layers]

In [None]:
layers[1]()

In [None]:
[layer.input for layer in model.layers]

In [None]:
enc_output.shape

In [None]:
emb = np.array(layers[0](np.array(trainX[0:12])))
layers[-1](np.array(enc_output[0,0,:]).reshape(1,1,-1)).shape

In [None]:
np.array(enc_output[0,0,:]).shape

In [None]:
layer = LSTM(128, input_shape=(8,256), return_sequences=True, return_state=True)
layer(np.array(enc_output[0,0,:]).reshape(1,1,256))

[<tf.Tensor: shape=(1, 1, 128), dtype=float32, numpy=
 array([[[-1.4551018e-03, -8.4214687e-04,  9.9801912e-04,  4.0674073e-04,
           6.3562952e-04,  1.3461038e-03, -2.8510811e-04, -2.5650763e-04,
          -1.5970368e-04,  1.4687947e-03,  2.4260647e-04, -4.5335016e-04,
          -4.2363498e-04,  1.6707971e-05,  3.9971250e-04, -7.5283658e-04,
           1.7399113e-03,  8.7354629e-04, -7.6641125e-04, -4.5693957e-04,
           3.0355118e-04,  1.4359169e-03,  1.4759449e-04, -7.6561468e-05,
           3.3676540e-04,  2.0027789e-04,  4.0466568e-04,  5.7189236e-04,
          -6.0488546e-04, -1.1757636e-04, -1.5828786e-04, -1.3522982e-03,
          -1.1456797e-03, -6.0270802e-04, -1.8989336e-03,  7.4525026e-04,
          -1.3446304e-04, -7.1785349e-04,  1.3909987e-03,  6.1527395e-04,
           1.6450656e-04, -6.6929171e-04,  3.5361017e-04, -1.4782239e-03,
          -3.5549681e-05,  1.0267527e-04, -1.1837718e-03, -7.3577208e-04,
           5.3062837e-04,  4.1731278e-04,  1.0159778e-03, 

## Eng to Hin (Incomplete)

In [22]:
class encoder(tf.keras.Model):
  def __init__(self, src_vocab):
    super(encoder, self).__init__()

    self.emb_layer = Embedding(src_vocab, 16, input_length=8)
    self.encoder_layer = LSTM(256)

  def call(self, inputs):
    # print(self.emb_layer)
    x = self.emb_layer(inputs)
    x = self.encoder_layer(x)
    return x


In [23]:
enc = encoder(src_vocab)

In [24]:
enc_output = enc(trainX)

In [25]:
enc_output.shape

TensorShape([8000, 256])

In [26]:
class Decoder(tf.keras.Model):
  def __init__(self, units, tar_vocab):
    super(Decoder, self).__init__()

    self.units = units
    self.tar_vocab = tar_vocab

    self.repeat = RepeatVector(8)
    self.decoder_layer = LSTM(self.units, return_sequences=True)
    self.dense = TimeDistributed(Dense(self.tar_vocab, activation='softmax'))

  def call(self, inputs):
    x = self.repeat(inputs)
    x = self.decoder_layer(x)
    x = self.dense(x)
    return x

In [27]:
dec = Decoder(256, tar_vocab)
dec(enc_output).shape

TensorShape([8000, 8, 4961])

In [34]:
class translator(tf.keras.Model):
  def __init__(self, src_vocab, tar_vocab):
    super(translator, self).__init__()
    self.src_vocab = src_vocab
    self.tar_vocab = tar_vocab

    self.encoder = encoder(self.src_vocab)
    self.decoder = Decoder(256, self.tar_vocab)

  def call(self, inputs):
    trainx = inputs
    x = self.encoder(trainx)
    x = self.decoder(x)

    return x

In [35]:
model = translator(src_vocab, tar_vocab)

In [36]:
model.compile(optimizer='adam', loss='categorical_crossentropy')

In [38]:
model.fit(trainX, trainY, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f305a932ee0>

In [None]:
with open('hin.txt') as f:
    lines = f.readlines()

In [None]:
len(lines)

In [None]:
def prepare_data(lines):
  inputs = []
  outputs = []

  for i in range(len(lines)):
    src, target, _ = lines[i].split("\t")
    inputs.append(src)
    outputs.append(target)
  
  inputs = np.array(inputs)
  outputs = np.array(outputs)
  return inputs, outputs

inputs, outputs = prepare_data(lines)

In [None]:
inputs.shape, outputs.shape

In [None]:
eng_tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=5000,lower=True)
hin_tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=5000,lower=True)

eng_tokenizer.fit_on_texts(inputs)
hin_tokenizer.fit_on_texts(outputs)

trainX = eng_tokenizer.texts_to_sequences(inputs)
trainX = tf.keras.preprocessing.sequence.pad_sequences(trainX, maxlen=8, padding='post')

trainY = hin_tokenizer.texts_to_sequences(outputs)
trainY = tf.keras.preprocessing.sequence.pad_sequences(trainY, maxlen=8, padding='post')

def encode_output(sequences, vocab_size):
 ylist = list()
 for sequence in sequences:
  encoded = tf.keras.utils.to_categorical(sequence, num_classes=vocab_size)
  ylist.append(encoded)
 y = np.array(ylist)
 y = y.reshape(sequences.shape[0], sequences.shape[1], vocab_size)
 return y

trainY = encode_output(trainY[:9000], len(hin_tokenizer.word_index)+1)

In [None]:
len(hin_tokenizer.word_index)

In [None]:
tar_vocab = len(hin_tokenizer.word_index)+1
src_vocab = len(eng_tokenizer.word_index)+1
src = max(len(line.split()) for line in inputs)
tar = max(len(line.split()) for line in outputs)

In [None]:
layer1 = Embedding(src_vocab, 16, input_length=8)
layer2 = LSTM(256)
model = Sequential()
model.add(layer1)
model.add(layer2)

# we want to replicate the context vector for each time step
model.add(RepeatVector(8))
model.add(LSTM(256, return_sequences=True))

# converting decoder output to our desired sequence format
model.add(TimeDistributed(Dense(tar_vocab, activation='softmax')))
model.compile(optimizer='adam', loss='categorical_crossentropy')

In [None]:
model.summary()

In [None]:
trainX.shape

In [None]:
!pip install tensorflow-addons==0.16.1

In [None]:
import tensorflow_addons as tfa

In [None]:
bi_model = Sequential()
bi_model.add(Embedding(src_vocab, 16, input_length=8))
bi_model.add(tf.keras.layers.Bidirectional(LSTM(256, return_sequences=True), input_shape=(8,16)))
# bi_model.add(tf.keras.layers.Attention(256))
bi_model.add(LSTM(256, return_sequences=True))
bi_model.add(TimeDistributed(Dense(tar_vocab, activation='softmax')))
bi_model.compile(optimizer='adam', loss='categorical_crossentropy')
bi_model.summary()

In [None]:
trainY.shape

In [None]:
bi_model.fit(trainX, trainY, epochs=250, batch_size=64)

In [None]:
predictions = bi_model.predict(trainX)

In [None]:
predictions.shape

In [None]:
[np.argmax(vector) for vector in predictions[14]]

In [None]:
model.fit(trainX, trainY, epochs=250, batch_size=64)

In [None]:
input = trainX[121:528]

In [None]:
# mapping tokens to words and vice-versa for both source and the target
eng_word_to_token = eng_tokenizer.word_index
eng_token_to_word = {token:word for word, token in eng_word_to_token.items()}

hin_word_to_token = hin_tokenizer.word_index
hin_token_to_word = {token:word for word, token in hin_word_to_token.items()}

In [None]:
def inference(model, input, source_tokenizer, tar_token_to_word):
  """
  Args:
  input - a string in the source language
  """
  tokenized = source_tokenizer.texts_to_sequences(input)
  input = tf.keras.preprocessing.sequence.pad_sequences(tokenized, maxlen=8, padding='post')
  prediction = model.predict(input)

  # print(input)
  input_list = []
  for i in input[0]:
    if i == 0:
      break
    else:
      input_list.append(eng_token_to_word[i])
  print("Input: ", ' '.join(input_list))

  output = [np.argmax(vector) for vector in prediction[0]]

  output_list = []
  for i in output:
    if i == 0:
      break
    else:
      output_list.append(tar_token_to_word[i])

  output_sentence = ' '.join(output_list)
  return output_sentence

input = ["how you doing"]
inference(bi_model, input, eng_tokenizer, hin_token_to_word)