In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

english_sentences = [
    'hello', 'how are you?', 'good morning', 'what is your name?', 'I love programming',
    'I am learning machine learning', 'this is a great day', 'how is the weather?', 'I feel happy today',
    'thank you very much', 'can you help me?', 'I am going to the store', 'do you want coffee?', 'I need a break'
]

french_sentences = [
    'bonjour', 'comment ça va?', 'bonjour', 'quel est ton nom?', 'j\'aime la programmation',
    'j\'apprends l\'apprentissage automatique', 'c\'est une belle journée', 'comment est le temps?', 'je me sens bien aujourd\'hui',
    'merci beaucoup', 'peux-tu m\'aider?', 'je vais au magasin', 'veux-tu du café?', 'j\'ai besoin d\'une pause'
]


# Preprocess Data

# Tokenize English and French sentences
eng_tokenizer = Tokenizer()
eng_tokenizer.fit_on_texts(english_sentences)
eng_vocab_size = len(eng_tokenizer.word_index) + 1
eng_sequences = eng_tokenizer.texts_to_sequences(english_sentences)
eng_max_len = max([len(seq) for seq in eng_sequences])
eng_padded = pad_sequences(eng_sequences, maxlen=eng_max_len, padding='post')

fr_tokenizer = Tokenizer()
fr_tokenizer.fit_on_texts(french_sentences)
fr_vocab_size = len(fr_tokenizer.word_index) + 1
fr_sequences = fr_tokenizer.texts_to_sequences(french_sentences)
fr_max_len = max([len(seq) for seq in fr_sequences])
fr_padded = pad_sequences(fr_sequences, maxlen=fr_max_len, padding='post')

# Define the NMT Model (Encoder-Decoder)

# Encoder
encoder_inputs = Input(shape=(None,))
encoder_embedding = Embedding(input_dim=eng_vocab_size, output_dim=256)(encoder_inputs)
encoder_lstm = LSTM(256, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)

# Decoder
decoder_inputs = Input(shape=(None,))
decoder_embedding = Embedding(input_dim=fr_vocab_size, output_dim=256)(decoder_inputs)
decoder_lstm = LSTM(256, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=[state_h, state_c])
decoder_dense = Dense(fr_vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Build the Model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compile the Model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Prepare Decoder Output for Training
decoder_output_data = np.expand_dims(fr_padded, -1)

# Train the Model
model.fit([eng_padded, fr_padded], decoder_output_data, epochs=100, batch_size=2)

# Separate the encoder model to get encoder states
encoder_model = Model(encoder_inputs, [state_h, state_c])

# Create the decoder model for prediction
decoder_state_input_h = Input(shape=(256,))
decoder_state_input_c = Input(shape=(256,))
decoder_state_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(decoder_embedding, initial_state=decoder_state_inputs)
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model([decoder_inputs] + decoder_state_inputs, [decoder_outputs, state_h, state_c])

# Translate a New Sentence
def translate_sentence(input_sentence):
    # Tokenize input sentence
    input_seq = eng_tokenizer.texts_to_sequences([input_sentence])
    input_seq = pad_sequences(input_seq, maxlen=eng_max_len, padding='post')

    # Get the encoder's states
    states_value = encoder_model.predict(input_seq)

    # Prepare initial input for the decoder
    target_seq = np.zeros((1, 1))  # Start token (usually 0 or a specific token)

    decoded_sentence = ''
    for _ in range(fr_max_len):
        output_tokens, state_h, state_c = decoder_model.predict([target_seq] + states_value)

        # Get the predicted word
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = fr_tokenizer.index_word.get(sampled_token_index, '')

        decoded_sentence += ' ' + sampled_word

        # Exit condition: when the predicted word is the <end> token
        if sampled_word == '<end>' or sampled_token_index == 0:
            break

        # Update the target sequence and states
        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index
        states_value = [state_h, state_c]

    return decoded_sentence


Epoch 1/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 84ms/step - accuracy: 0.1915 - loss: 3.6686
Epoch 2/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 74ms/step - accuracy: 0.4137 - loss: 3.1459
Epoch 3/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 83ms/step - accuracy: 0.4112 - loss: 2.4371
Epoch 4/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 86ms/step - accuracy: 0.2699 - loss: 2.7933
Epoch 5/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step - accuracy: 0.3316 - loss: 2.4833
Epoch 6/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step - accuracy: 0.3860 - loss: 2.2329
Epoch 7/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - accuracy: 0.5242 - loss: 1.8535
Epoch 8/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.3707 - loss: 2.3148
Epoch 9/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

In [None]:
# # Test Translation
input_sentence = 'Hello'
translated_sentence = translate_sentence(input_sentence)
print(f"Translated sentence: {translated_sentence}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 195ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 204ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Translated sentence:  bonjour 
