In [1]:
import keras

In [2]:
import tensorflow as tf

In [3]:
from tensorflow.keras.layers import GRU,Embedding,Dense,Input

In [4]:
from keras import Model

In [73]:
class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)
        
    def call(self, inputs):
        query,values=inputs
        query_with_time_axis = tf.expand_dims(query, 1)
        score1=self.W1(query_with_time_axis)
        score2=self.W2(values)
        combined_score=tf.nn.tanh(score1 + score2)
        score = self.V(combined_score)
        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector

In [74]:
encoder_inputs=Input(shape=(max_inp_len,))
decoder_inputs=Input(shape=(1,))

encoder_embedded=Embedding(src_vocab_size,embedding_dim)
decoder_embedded=Embedding(trg_vocab_size,embedding_dim)

encoder_embed=encoder_embedded(encoder_inputs)

decoder_embed=decoder_embedded(decoder_inputs)

encoder_gru=GRU(gru_units,return_state=True,return_sequences=True,recurrent_initializer='glorot_uniform')
encoder_op,hidden=encoder_gru(encoder_embed)

attention=BahdanauAttention(gru_units)
context_vector=attention([hidden,encoder_op])
decoder_embed = tf.concat([tf.expand_dims(context_vector, 1), decoder_embed], axis=-1)
decoder_gru=GRU(gru_units,return_state=True,return_sequences=True,recurrent_initializer='glorot_uniform')
output,h1=decoder_gru(decoder_embed,initial_state=hidden)
output = tf.reshape(output, (-1, output.shape[2]))

decoder_fc=Dense(trg_vocab_size,activation="softmax")
decoder_op=decoder_fc(output)

In [75]:
encoder_model=Model(encoder_inputs,[encoder_op,hidden])

In [76]:
decoder_model=Model([decoder_inputs,encoder_op,hidden],[decoder_op,h1])

In [77]:
encoder_model.summary()

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_19 (InputLayer)       [(None, 5)]               0         
                                                                 
 embedding_8 (Embedding)     (None, 5, 150)            208800    
                                                                 
 gru_8 (GRU)                 [(None, 5, 100),          75600     
                              (None, 100)]                       
                                                                 
Total params: 284400 (1.08 MB)
Trainable params: 284400 (1.08 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [78]:
decoder_model.summary()

Model: "model_6"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_22 (InputLayer)       [(None, 100)]                0         []                            
                                                                                                  
 input_21 (InputLayer)       [(None, 5, 100)]             0         []                            
                                                                                                  
 bahdanau_attention_4 (Bahd  (None, 100)                  20301     ['input_22[0][0]',            
 anauAttention)                                                      'input_21[0][0]']            
                                                                                                  
 input_20 (InputLayer)       [(None, 1)]                  0         []                      

In [79]:
optimizer=keras.optimizers.Adam(learning_rate=0.7,clipnorm=4)

In [80]:
def train_model(inputs):
    input_seq, target_seq= inputs
    input_target_seq=target_seq[:,:-1]
    target_target_seq=target_seq[:,1:]
    enc_output, enc_hidden = encoder_model(input_seq)
    dec_hidden = enc_hidden
    loss = 0
    for t in range(target_seq.shape[1]-1):
        predictions, dec_hidden = decoder_model([input_target_seq[:, t], enc_output, dec_hidden])
        loss += tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(target_target_seq[:, t], predictions, 
                                                                               from_logits=False))
        batch_loss = (loss / int(target_seq.shape[1]))
    return batch_loss

In [82]:
NUM_EPOCHS =5
for epoch in range(NUM_EPOCHS):
    total_loss = 0.0
    with tf.GradientTape() as tape:
        loss = train_model([src_sequences,tar_sequences])
        total_loss += loss

    grads = tape.gradient(total_loss, encoder_model.trainable_variables + decoder_model.trainable_variables)
    optimizer.apply_gradients(zip(grads, encoder_model.trainable_variables + decoder_model.trainable_variables))
    print(f"Epoch {epoch+1}/{NUM_EPOCHS}, Loss: {total_loss.numpy():.4f}")

Epoch 1/5, Loss: 18.1980
Epoch 2/5, Loss: 8.4369
Epoch 3/5, Loss: 7.7339
Epoch 4/5, Loss: 12.4335
Epoch 5/5, Loss: 16.7369


In [62]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

In [63]:
inputs = []
outputs = []

data_file = open('spa.txt', encoding='utf-8')

count = 0
for line in data_file:
    count += 1
    if count > 5000:
        break
    if '\t' not in line:
        continue
    ip, temp_op, extra = line.rstrip().split('\t')
    op = temp_op
    inputs.append(ip)
    outputs.append(op)

In [64]:
inputs=[sentence.lower() for sentence in inputs]
outputs=[sentence.lower() for sentence in outputs]

In [65]:
outputs = ['<start> '+sentence+' <end>' for sentence in outputs]

In [66]:
embedding_dim=150
gru_units=100

In [67]:
EnglishTokenizer=Tokenizer(oov_token="<UNK>")
EnglishTokenizer.fit_on_texts(inputs)
inp_sequences=EnglishTokenizer.texts_to_sequences(inputs)
max_inp_len=max(len(i) for i in inp_sequences)
src_sequences=pad_sequences(inp_sequences,maxlen=max_inp_len,padding="post")
Englishword2index=EnglishTokenizer.word_index
Englishindex2word=EnglishTokenizer.index_word

In [68]:
SpanishTokenizer=Tokenizer(oov_token="<UNK>")
SpanishTokenizer.fit_on_texts(outputs)
op_sequences=SpanishTokenizer.texts_to_sequences(outputs)
max_tar_len=max(len(i) for i in op_sequences)
tar_sequences=pad_sequences(op_sequences,maxlen=max_tar_len,padding="post")
Spanishword2index=SpanishTokenizer.word_index
Spanishindex2word=SpanishTokenizer.index_word

In [69]:
src_vocab_size=len(Englishword2index)+1
trg_vocab_size=len(Spanishword2index)+1
print("src_vocab_size:",src_vocab_size)
print("tar_vocab_size:",trg_vocab_size)

src_vocab_size: 1392
tar_vocab_size: 3106


In [70]:
print("max_inp_len:",max_inp_len)
print("max_tar_len:",max_tar_len)

max_inp_len: 5
max_tar_len: 10


In [71]:
src_sequences.shape

(5000, 5)

In [72]:
tar_sequences.shape

(5000, 10)