In [4]:
# ============================
# Seq2Seq with Attention - English → French (Toy Demo)
# ============================

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Concatenate, Dot, Activation
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# ----------------------------
# 1. Tiny Dataset
# ----------------------------
english_sentences = ["hello", "how are you", "good morning", "i love you", "see you later"]
french_sentences  = ["bonjour", "comment ça va", "bonjour", "je t'aime", "à plus tard"]

# Add start/end tokens
fr_input_sentences  = ["<start> " + s for s in french_sentences]
fr_target_sentences = [s + " <end>" for s in french_sentences]

# ----------------------------
# 2. Tokenization
# ----------------------------
num_words = 1000

eng_tokenizer = Tokenizer(num_words=num_words, filters='')
eng_tokenizer.fit_on_texts(english_sentences)
eng_sequences = eng_tokenizer.texts_to_sequences(english_sentences)
eng_word_index = eng_tokenizer.word_index

fr_tokenizer = Tokenizer(num_words=num_words, filters='')
fr_tokenizer.fit_on_texts(fr_input_sentences + fr_target_sentences)
fr_input_seq  = fr_tokenizer.texts_to_sequences(fr_input_sentences)
fr_target_seq = fr_tokenizer.texts_to_sequences(fr_target_sentences)
fr_word_index = fr_tokenizer.word_index

# ----------------------------
# 3. Padding
# ----------------------------
max_eng_len = max(len(s) for s in eng_sequences)
max_fr_len  = max(len(s) for s in fr_target_seq)

encoder_input_data = pad_sequences(eng_sequences, maxlen=max_eng_len, padding='post')
decoder_input_data = pad_sequences(fr_input_seq, maxlen=max_fr_len, padding='post')
decoder_target_data = pad_sequences(fr_target_seq, maxlen=max_fr_len, padding='post')

# ----------------------------
# 4. Parameters
# ----------------------------
embed_dim = 64
latent_dim = 128
eng_vocab_size = len(eng_word_index) + 1
fr_vocab_size  = len(fr_word_index) + 1

# ----------------------------
# 5. Encoder
# ----------------------------
encoder_inputs = Input(shape=(max_eng_len,))
enc_emb_layer  = Embedding(eng_vocab_size, embed_dim, mask_zero=True)
enc_emb        = enc_emb_layer(encoder_inputs)
encoder_lstm   = LSTM(latent_dim, return_sequences=True, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(enc_emb)

# ----------------------------
# 6. Decoder
# ----------------------------
decoder_inputs = Input(shape=(max_fr_len,))
dec_emb_layer  = Embedding(fr_vocab_size, embed_dim, mask_zero=True)
dec_emb        = dec_emb_layer(decoder_inputs)
decoder_lstm   = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=[state_h, state_c])

# ----------------------------
# 7. Attention
# ----------------------------
score = Dot(axes=[2, 2])([decoder_outputs, encoder_outputs])
attn_weights = Activation('softmax')(score)
context = Dot(axes=[2, 1])([attn_weights, encoder_outputs])
decoder_combined_context = Concatenate(axis=-1)([context, decoder_outputs])

# ----------------------------
# 8. Final Dense
# ----------------------------
output_dense = Dense(fr_vocab_size, activation='softmax')
decoder_pred = output_dense(decoder_combined_context)

# ----------------------------
# 9. Training Model
# ----------------------------
model = Model([encoder_inputs, decoder_inputs], decoder_pred)
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy")
print(model.summary())

# Train small epochs (demo only)
model.fit(
    [encoder_input_data, decoder_input_data],
    np.expand_dims(decoder_target_data, -1),
    batch_size=2,
    epochs=300,
    verbose=0
)

# ----------------------------
# 10. Inference Models
# ----------------------------
# Encoder inference
encoder_model = Model(encoder_inputs, [encoder_outputs, state_h, state_c])

# Decoder inference
dec_state_input_h = Input(shape=(latent_dim,))
dec_state_input_c = Input(shape=(latent_dim,))
enc_out_input     = Input(shape=(max_eng_len, latent_dim))

dec_emb2 = dec_emb_layer(decoder_inputs)
dec_out2, dec_h2, dec_c2 = decoder_lstm(dec_emb2, initial_state=[dec_state_input_h, dec_state_input_c])

score2 = Dot(axes=[2,2])([dec_out2, enc_out_input])
attn_weights2 = Activation('softmax')(score2)
context2 = Dot(axes=[2,1])([attn_weights2, enc_out_input])
dec_combined2 = Concatenate(axis=-1)([context2, dec_out2])

dec_pred2 = output_dense(dec_combined2)

decoder_model = Model(
    [decoder_inputs, enc_out_input, dec_state_input_h, dec_state_input_c],
    [dec_pred2, dec_h2, dec_c2]
)

# ----------------------------
# 11. Translation Function
# ----------------------------
reverse_fr_index = {i: w for w, i in fr_word_index.items()}

def translate_sentence(input_text):
    seq = eng_tokenizer.texts_to_sequences([input_text])
    seq = pad_sequences(seq, maxlen=max_eng_len, padding='post')
    
    enc_out, h, c = encoder_model.predict(seq, verbose=0)
    
    target_seq = np.zeros((1,1))
    target_seq[0,0] = fr_word_index['<start>']   # FIXED
    
    decoded_sentence = []
    for _ in range(max_fr_len):
        preds, h, c = decoder_model.predict([target_seq, enc_out, h, c], verbose=0)
        pred_id = np.argmax(preds[0, -1, :])
        word = reverse_fr_index.get(pred_id, '')
        if word == '<end>' or word == '':
            break
        decoded_sentence.append(word)
        
        target_seq = np.zeros((1,1))
        target_seq[0,0] = pred_id
    return " ".join(decoded_sentence)

# ----------------------------
# 12. Test
# ----------------------------
print("English: hello → French:", translate_sentence("hello"))
print("English: i love you → French:", translate_sentence("i love you"))
print("English: good morning → French:", translate_sentence("good morning"))
print("English: see you later → French:", translate_sentence("see you later"))


None
English: hello → French: bonjour
English: i love you → French: je t'aime
English: good morning → French: bonjour
English: see you later → French: à plus tard
