In [133]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import AdditiveAttention
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding, Attention, concatenate
from nltk.translate.bleu_score import sentence_bleu

In [134]:
data = [
    ("Hello", "Muraho"),
    ("mwiriwe", "Good evening"),
    ("How are you?", "Mumeze mute?"),
    ("Good morning", "Mwaramutse"),
    ("Thank you", "Murakoze"),
    ("Goodbye", "Murabeho"),
]

# Load and preprocess data
def preprocess_data(data):
    english_sentences, kinyarwanda_sentences = zip(*data)

    tokenizer_eng = Tokenizer()
    tokenizer_kin = Tokenizer()
    tokenizer_eng.fit_on_texts(english_sentences)
    tokenizer_kin.fit_on_texts(kinyarwanda_sentences)

    english_sequences = tokenizer_eng.texts_to_sequences(english_sentences)
    kinyarwanda_sequences = tokenizer_kin.texts_to_sequences(kinyarwanda_sentences)

    max_eng_len = max(len(seq) for seq in english_sequences)
    max_kin_len = max(len(seq) for seq in kinyarwanda_sequences)

    english_padded = pad_sequences(english_sequences, maxlen=max_eng_len, padding='post')
    kinyarwanda_padded = pad_sequences(kinyarwanda_sequences, maxlen=max_kin_len, padding='post')

    return (english_padded, kinyarwanda_padded,
            tokenizer_eng, tokenizer_kin,
            max_eng_len, max_kin_len)

In [135]:
def build_model(eng_vocab_size, kin_vocab_size, max_eng_len, max_kin_len):
    # Encoder
    encoder_inputs = Input(shape=(max_eng_len,))
    encoder_embedding = Embedding(input_dim=eng_vocab_size, output_dim=256)(encoder_inputs)
    encoder_lstm = LSTM(256, return_sequences=True, return_state=True)
    encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
    encoder_states = [state_h, state_c]

    # Decoder
    decoder_inputs = Input(shape=(max_kin_len,))
    decoder_embedding = Embedding(input_dim=kin_vocab_size, output_dim=256)(decoder_inputs)
    decoder_lstm = LSTM(256, return_sequences=True, return_state=True)
    decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)

    # Attention mechanism
    attention = AdditiveAttention()
    context_vector = attention([decoder_outputs, encoder_outputs])
    decoder_combined_context = concatenate([context_vector, decoder_outputs])

    # Output layer
    output = Dense(kin_vocab_size, activation='softmax')(decoder_combined_context)

    model = Model([encoder_inputs, decoder_inputs], output)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

In [136]:
def sequence_to_text(sequence, tokenizer):
    reverse_word_map = dict(map(reversed, tokenizer.word_index.items()))
    return ' '.join([reverse_word_map.get(i, '') for i in sequence if i != 0])

In [137]:
def create_inference_models(model):
    # Encoder model
    encoder_inputs = model.input[0]  
    encoder_outputs, state_h_enc, state_c_enc = model.layers[4].output

    encoder_model = Model(encoder_inputs, [encoder_outputs, state_h_enc, state_c_enc])

    # Decoder model
    decoder_inputs = model.input[1] 
    decoder_state_input_h = Input(shape=(256,), name="decoder_state_input_h")
    decoder_state_input_c = Input(shape=(256,), name="decoder_state_input_c")  
    encoder_output_input = Input(shape=(None, 256), name="encoder_output_input")

    # Embedding layer shared with the trained model
    decoder_embedding = model.layers[3](decoder_inputs)  # reuse embedding layer

    # Reuse the LSTM layer from the original model
    decoder_lstm = model.layers[5]
    decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(
        decoder_embedding, initial_state=[decoder_state_input_h, decoder_state_input_c]
    )

    # Attention mechanism
    attention = model.layers[6]
    context_vector = attention([decoder_outputs, encoder_output_input])

    # Concatenate context vector with decoder outputs
    decoder_combined_context = concatenate([context_vector, decoder_outputs])

    # Dense layer (softmax prediction)
    decoder_dense = model.layers[-1]
    decoder_outputs = decoder_dense(decoder_combined_context)

    # Decoder inference model
    decoder_model = Model(
        [decoder_inputs, decoder_state_input_h, decoder_state_input_c, encoder_output_input],
        [decoder_outputs, state_h_dec, state_c_dec]
    )

    return encoder_model, decoder_model

In [138]:
def translate_sentence(input_sentence, encoder_model, decoder_model, tokenizer_eng, tokenizer_kin, max_eng_len, max_kin_len):
    # Preprocess input sentence
    input_seq = tokenizer_eng.texts_to_sequences([input_sentence])
    input_seq = pad_sequences(input_seq, maxlen=max_eng_len, padding='post')

    # Encode the input sentence
    encoder_outputs, state_h, state_c = encoder_model.predict(input_seq)

    # Initialize the target sequence with the start token
    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = tokenizer_kin.word_index.get('<start>', 1) 

    translated_sentence = []
    for _ in range(max_kin_len):
        # Pass the target_seq, encoder_outputs, and decoder states into the decoder
        output_tokens, state_h, state_c = decoder_model.predict(
            [target_seq, state_h, state_c, encoder_outputs]
        )

        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = tokenizer_kin.index_word.get(sampled_token_index, '')

        print(f"Predicted Token Index: {sampled_token_index}, Predicted Word: {sampled_word}")

        if sampled_word == '<end>' or sampled_word == '':
            break

        # Append the predicted word to the translated sentence
        translated_sentence.append(sampled_word)

        # Update the target sequence with the predicted word index
        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index

    return ' '.join(translated_sentence)


In [139]:
def evaluate_model(test_sentences, true_translations, encoder_model, decoder_model, tokenizer_eng, tokenizer_kin, max_eng_len, max_kin_len):
    predictions = [translate_sentence(sent, encoder_model, decoder_model, tokenizer_eng, tokenizer_kin, max_eng_len, max_kin_len) for sent in test_sentences]
    bleu_scores = [sentence_bleu([ref.split()], pred.split()) for ref, pred in zip(true_translations, predictions)]
    avg_bleu = np.mean(bleu_scores)
    return avg_bleu, predictions

In [140]:
def main():
    english_padded, kinyarwanda_padded, tokenizer_eng, tokenizer_kin, max_eng_len, max_kin_len = preprocess_data(data)

    eng_vocab_size = len(tokenizer_eng.word_index) + 1
    kin_vocab_size = len(tokenizer_kin.word_index) + 1

    model = build_model(eng_vocab_size, kin_vocab_size, max_eng_len, max_kin_len)

    print("Model Summary:")
    model.summary()

    # Train the model
    model.fit(
        [english_padded, kinyarwanda_padded[:, :-1]],
        kinyarwanda_padded[:, 1:], 
        batch_size=32,
        epochs=50,
        validation_split=0.2
    )

    print("\nCreating inference models...")
    # Create inference models
    encoder_model, decoder_model = create_inference_models(model)

    # Example translation
    input_sentence = "Hello"
    print("\nTranslating:")
    print("English:", input_sentence)
    print("Kinyarwanda:", translate_sentence(input_sentence, encoder_model, decoder_model, tokenizer_eng, tokenizer_kin, max_eng_len, max_kin_len))

    # Example of evaluating the model
    test_sentences = ["Hello", "How are you?"]
    true_translations = ["Muraho", "Mumeze mute?"]
    bleu_score, predictions = evaluate_model(test_sentences, true_translations, encoder_model, decoder_model, tokenizer_eng, tokenizer_kin, max_eng_len, max_kin_len)
    print("\nEvaluation:")
    print("Average BLEU score:", bleu_score)
    print("Predictions:", predictions)


In [141]:
if __name__ == "__main__":
   main()

Model Summary:


Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step - accuracy: 0.2500 - loss: 2.1934 - val_accuracy: 1.0000 - val_loss: 2.1587
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step - accuracy: 1.0000 - loss: 2.1429 - val_accuracy: 1.0000 - val_loss: 2.1225
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 137ms/step - accuracy: 1.0000 - loss: 2.0903 - val_accuracy: 1.0000 - val_loss: 2.0813
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - accuracy: 1.0000 - loss: 2.0330 - val_accuracy: 1.0000 - val_loss: 2.0328
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step - accuracy: 1.0000 - loss: 1.9686 - val_accuracy: 1.0000 - val_loss: 1.9748
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step - accuracy: 0.7500 - loss: 1.8946 - val_accuracy: 1.0000 - val_loss: 1.9049
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━