<a href="https://colab.research.google.com/github/Aswin-Cheerngodan/RNN/blob/main/Seq2Seq_Attention.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

In [21]:
def data_preprocessor(source_sentences,target_sentences):
    source_tokenizer = Tokenizer()
    source_tokenizer.fit_on_texts(source_sentences)
    source_sequences = source_tokenizer.texts_to_sequences(source_sentences)
    source_padded  = pad_sequences(source_sequences,padding='post')

    target_sentences = ["start " + sentence + " end" for sentence in target_sentences]

    target_tokenizer = Tokenizer()
    target_tokenizer.fit_on_texts(target_sentences)
    target_sequences = target_tokenizer.texts_to_sequences(target_sentences)
    target_padded = pad_sequences(target_sequences,padding='post')

    return source_padded,target_padded,source_tokenizer,target_tokenizer


english_sentences = ['hello', 'world', 'how are you', 'I am fine', 'have a good day']
spanish_sentences = ['hola', 'mundo', 'cómo estás', 'estoy bien', 'ten un buen día']
input_texs,target_texts,source_tokenizer,target_tokenizer= data_preprocessor(
    english_sentences,
    spanish_sentences
)


In [22]:
from tensorflow.keras.layers import Input,LSTM,Dense,Embedding,Concatenate
from tensorflow.keras.layers import AdditiveAttention as Attention
from tensorflow.keras.models import Model

In [23]:
embedding_dim = 256
latent_dim = 512
num_encoder_tokens = len(source_tokenizer.word_index)+1
num_decoder_tokens = len(target_tokenizer.word_index)+1

#Encoder
encoder_inputs = Input(shape=(None,))
encoder_embedding = Embedding(num_encoder_tokens,embedding_dim)(encoder_inputs)
encoder_lstm = LSTM(latent_dim,return_state=True)
encoder_outputs, state_h,state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h,state_c]

#Decoder
decoder_inputs = Input(shape=(None,))
decoder_embedding = Embedding(num_decoder_tokens,embedding_dim)(decoder_inputs)
decoder_lstm = LSTM(latent_dim,return_sequences=True,return_state=True)
decoder_outputs, _,_ = decoder_lstm(decoder_embedding,initial_state=encoder_states)

#Attention layer
attention = Attention()
attention_output = attention([decoder_outputs,encoder_outputs])

decoder_concat_input = Concatenate(axis=-1)([decoder_outputs,attention_output])

#Dense Layer
decoder_dense = Dense(num_decoder_tokens,activation='softmax')
decoder_outputs = decoder_dense(decoder_concat_input)

#MODEL
model = Model([encoder_inputs,decoder_inputs],decoder_outputs)
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [24]:
from tensorflow.keras.utils import to_categorical
decoder_target_data = to_categorical(target_texts,num_decoder_tokens)
model.fit([input_texs,target_texts],decoder_target_data,batch_size=64,epochs=50,
          validation_split=0.2)

Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step - accuracy: 0.0833 - loss: 2.5606 - val_accuracy: 0.0000e+00 - val_loss: 2.5653
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 657ms/step - accuracy: 0.4167 - loss: 2.4782 - val_accuracy: 0.0000e+00 - val_loss: 2.5647
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 274ms/step - accuracy: 0.4167 - loss: 2.3818 - val_accuracy: 0.0000e+00 - val_loss: 2.5651
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 316ms/step - accuracy: 0.4167 - loss: 2.2492 - val_accuracy: 0.0000e+00 - val_loss: 2.5676
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 589ms/step - accuracy: 0.4167 - loss: 2.0552 - val_accuracy: 0.0000e+00 - val_loss: 2.5746
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 345ms/step - accuracy: 0.4167 - loss: 1.7920 - val_accuracy: 0.0000e+00 - val_loss: 2.5930
Epoch 7/50
[1m1/1

<keras.src.callbacks.history.History at 0x7d1aa59d0350>

In [25]:
# Encoder Inference Model
encoder_model = Model(encoder_inputs, encoder_states)

# Decoder Inference Model
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_state_inputs = [decoder_state_input_h, decoder_state_input_c]

# Fix: Create a new single-word decoder input
decoder_input_single = Input(shape=(1,))
decoder_embedding_inf = Embedding(num_decoder_tokens, embedding_dim)(decoder_input_single)

decoder_outputs, state_h, state_c = decoder_lstm(
    decoder_embedding_inf, initial_state=decoder_state_inputs
)
decoder_states = [state_h, state_c]

# Fix: Use a separate Dense layer for inference
decoder_dense_inf = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense_inf(decoder_outputs)

decoder_model = Model([decoder_input_single] + decoder_state_inputs, [decoder_outputs] + decoder_states)


In [27]:

def translate(input_text):
    # Tokenize and pad the input sequence
    input_seq = source_tokenizer.texts_to_sequences([input_text])
    input_seq = pad_sequences(input_seq, maxlen=input_texs.shape[1], padding='post')


    if len(input_seq[0]) == 0:
        return "Unknown input"

    # Get the encoder states
    states_value = encoder_model.predict(input_seq)

    # Generate an empty target sequence of length 1
    target_seq = np.zeros((1, 1))


    target_seq[0, 0] = target_tokenizer.word_index.get('start', 1)

    stop_condition = False
    decoded_sentence = ''
    max_decoder_length = target_texts.shape[1]  # Use actual target length

    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])

        # Handle out-of-vocab cases
        if sampled_token_index not in target_tokenizer.index_word:
            break

        sampled_char = target_tokenizer.index_word[sampled_token_index]
        decoded_sentence += ' ' + sampled_char

        # Exit condition
        if sampled_char == 'end' or len(decoded_sentence.split()) > max_decoder_length:
            stop_condition = True

        # Update the target sequence (length 1)
        target_seq[0, 0] = sampled_token_index

        # Update states
        states_value = [h, c]

    return decoded_sentence.replace(' end', '').strip()  # Remove 'end' token from output

# Example usage
translated_sentence = translate("how are you")
print(translated_sentence)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
mundo mundo estás estás bien bien bien
