In [1]:
import tensorflow as tf

In [2]:
NUM_SENTENCES = 20000 
MAX_NUM_WORDS = 20000 
MAX_SENT_LEN = 50

EMBEDDING_SIZE = 100

GRU_NEURONS = 100

BATCH_SIZE = 64
EPOCHS = 5

In [7]:
inputs = []
outputs = []

data_file = open('spa.txt', encoding='utf-8')

count = 0
for line in data_file:
    count += 1
    if count > NUM_SENTENCES:
        break
    if '\t' not in line:
        continue
    ip, temp_op, extra = line.rstrip().split('\t')
    op= '<sos> '+ temp_op +' <eos>'
    inputs.append(ip)
    outputs.append(op)

In [10]:
from keras.preprocessing.text import Tokenizer

input_tokenizer = Tokenizer(num_words=MAX_NUM_WORDS)
input_tokenizer.fit_on_texts(inputs)

inputs_seq = input_tokenizer.texts_to_sequences(inputs)

inputs_word2index = input_tokenizer.word_index
print('Total unique words in input:', len(inputs_word2index))

inputs_numwords = len(inputs_word2index)+1

inputs_maxlen = max(len(s) for s in inputs_seq)
print('Length of longest sentence in input:', inputs_maxlen)

output_tokenizer = Tokenizer(num_words=MAX_NUM_WORDS, filters='')
output_tokenizer.fit_on_texts(outputs)

outputs_seq = output_tokenizer.texts_to_sequences(outputs)

outputs_word2index = output_tokenizer.word_index
print('Total unique words in output:', len(outputs_word2index))

outputs_numwords = len(outputs_word2index)+1

outputs_maxlen = max(len(s) for s in outputs_seq)
print('Length of longest sentence in output:', outputs_maxlen)

Total unique words in input: 3769
Length of longest sentence in input: 6
Total unique words in output: 10553
Length of longest sentence in output: 14


In [22]:
from keras_preprocessing.sequence import pad_sequences

encoder_input_sequences = pad_sequences(inputs_seq, maxlen=inputs_maxlen)
print('encoder_input_sequences shape:', encoder_input_sequences.shape)

decoder_input_sequences = pad_sequences(outputs_seq, maxlen=outputs_maxlen, padding='post')
print('decoder_output_sequences shape:', decoder_output_sequences.shape)

encoder_input_sequences shape: (20000, 6)
decoder_output_sequences shape: (20000, 14)


# BahdanauAttention

In [12]:
class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)
        
    def call(self, inputs):
        query,values=inputs
        query_with_time_axis = tf.expand_dims(query, 1)
        score1=self.W1(query_with_time_axis)
        score2=self.W2(values)
        combined_score=tf.nn.tanh(score1 + score2)
        score = self.V(combined_score)
        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector

## ENCODER DECODER ARCHITECTURE

In [14]:
from tensorflow.keras.layers import Input,GRU,Dense,Embedding,Bidirectional

In [15]:
from keras import Model

In [19]:
encoder_inputs=Input(shape=(inputs_maxlen,))
encoder_embed=Embedding(inputs_numwords,EMBEDDING_SIZE)(encoder_inputs)
encoder_gru=Bidirectional(GRU(GRU_NEURONS,return_sequences=True,return_state=True))
encoder_op,forward,backward=encoder_gru(encoder_embed)
encoder_dense=Dense(GRU_NEURONS)
hidden=tf.nn.tanh(encoder_dense((tf.concat([forward,backward], axis = -1))))

decoder_inputs=Input(shape=(outputs_maxlen,))
decoder_embed=Embedding(outputs_numwords,EMBEDDING_SIZE)(decoder_inputs)
attention=BahdanauAttention(GRU_NEURONS)
context_vector=attention([hidden,encoder_op])
context_vector=tf.expand_dims(context_vector,1)
context_vector=tf.tile(context_vector,[1,tf.shape(decoder_embed)[1],1])
encoder_op=tf.transpose(encoder_op,perm=(0,2,1))
decoder_combined=tf.matmul(context_vector,encoder_op)
weighted=tf.concat([decoder_embed, context_vector],axis=2)

decoder_gru=GRU(GRU_NEURONS,return_sequences=True,return_state=True)
decoder_op,_=decoder_gru(weighted,initial_state=hidden)

ouput=tf.concat([decoder_op,decoder_combined,decoder_embed],axis=2)

dec_op=Dense(outputs_numwords)(ouput)

model=Model([encoder_inputs,decoder_inputs],dec_op)

In [20]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_6 (InputLayer)        [(None, 6)]                  0         []                            
                                                                                                  
 embedding_5 (Embedding)     (None, 6, 100)               377000    ['input_6[0][0]']             
                                                                                                  
 bidirectional_3 (Bidirecti  [(None, 6, 200),             121200    ['embedding_5[0][0]']         
 onal)                        (None, 100),                                                        
                              (None, 100)]                                                        
                                                                                              

In [23]:
decoder_outputs_onehot=tf.one_hot(decoder_input_sequences,depth=outputs_numwords)

In [25]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [26]:
trn = model.fit([encoder_input_sequences, decoder_input_sequences],
               decoder_outputs_onehot, 
               batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.1
               )

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [27]:
encoder_model = Model(encoder_inputs, [encoder_op,hidden])
print(encoder_model.summary())

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_6 (InputLayer)        [(None, 6)]                  0         []                            
                                                                                                  
 embedding_5 (Embedding)     (None, 6, 100)               377000    ['input_6[0][0]']             
                                                                                                  
 bidirectional_3 (Bidirecti  [(None, 6, 200),             121200    ['embedding_5[0][0]']         
 onal)                        (None, 100),                                                        
                              (None, 100)]                                                        
                                                                                            

In [30]:
hidden = Input(shape=(GRU_NEURONS,))
decoder_input_states = [hidden]

decoder_input_word = Input(shape=(1,))
decoder_input_word_emb = Embedding(outputs_numwords,EMBEDDING_SIZE)(decoder_input_word)
context_vector=attention([decoder_input_states,encoder_op])
context_vector=tf.expand_dims(context_vector,1)
context_vector=tf.tile(context_vector,[1,tf.shape(decoder_embed)[1],1])
encoder_op=tf.transpose(encoder_op,perm=(0,2,1))
decoder_combined=tf.matmul(context_vector,encoder_op)
weighted=tf.concat([decoder_embed, context_vector],axis=2)
decoder_op,h=decoder_gru(weighted,initial_state=decoder_input_states)
ouput=tf.concat([decoder_op,decoder_combined,decoder_embed],axis=2)

dec_op=dec_op(ouput)
decoder_states = [h]

outputs = output_dense_layer(decoder_outputs)

ValueError: Exception encountered when calling layer "bahdanau_attention_2" (type BahdanauAttention).

in user code:

    File "C:\Users\Rambabu\AppData\Local\Temp\ipykernel_7604\3982687109.py", line 12, in call  *
        score2=self.W2(values)
    File "C:\Users\Rambabu\AppData\Roaming\Python\Python310\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler  **
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\Rambabu\AppData\Roaming\Python\Python310\site-packages\keras\src\engine\input_spec.py", line 280, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "dense_10" is incompatible with the layer: expected axis -1 of input shape to have value 200, but received input with shape (None, 200, 6)


Call arguments received by layer "bahdanau_attention_2" (type BahdanauAttention):
  • inputs=[['tf.Tensor(shape=(None, 100), dtype=float32)'], 'tf.Tensor(shape=(None, 200, 6), dtype=float32)']

In [None]:
decoder_model = Model([decoder_input_word]+[encoder_op,decoder_input_states], [outputs]+decoder_states)