In [1]:
import collections
import numpy as np
import json
import tensorflow.strings as tf_strings
from keras.preprocessing.text import tokenizer_from_json

from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from keras.models import Model, Sequential, load_model
from keras.layers import Input, Dense, LSTM, Dropout, Activation, TimeDistributed, TextVectorization
from keras.optimizers import Adam
from keras.losses import sparse_categorical_crossentropy

## Verify Access to the GPU

In [2]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 362355904869434689
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 2236245607
locality {
  bus_id: 1
  links {
  }
}
incarnation: 17655777420153255138
physical_device_desc: "device: 0, name: NVIDIA GeForce GTX 1650 Ti, pci bus id: 0000:01:00.0, compute capability: 7.5"
xla_global_id: 416903419
]


In [3]:
## loading the tokenizers

model = load_model('english_to_french_lstm_model')

#load Tokenizer
with open('english_tokenizer.json') as f:
    data = json.load(f)
    english_tokenizer = tokenizer_from_json(data)
    
with open('french_tokenizer.json') as f:
    data = json.load(f)
    french_tokenizer = tokenizer_from_json(data)

## Implementation of Beam Search Decoding

In [4]:
with open('sequence_length.json') as f:
    max_length = json.load(f)

def pad(x, length=None):
    return pad_sequences(x, maxlen=length, padding='post')

def beam_search_decoder(predictions, beam_width=3, epsilon=1e-10):
    sequences = [[[], 0.0]]  
    # Walk over each step in the sequence
    for row in predictions:
        all_candidates = list()
        # Expand each current candidate
        for seq, score in sequences:
            for j, prob in enumerate(row):
                prob = max(prob, epsilon)  # Ensure prob is non-zero
                candidate = [seq + [j], score - np.log(prob)]
                all_candidates.append(candidate)
        # Order all candidates by score (lowest score first)
        ordered = sorted(all_candidates, key=lambda tup: tup[1])
        # Select k best samples
        sequences = ordered[:beam_width]
    return sequences

def translate_to_french(english_sentence, beam_width=3):
    english_sentence = english_sentence.lower()
    
    # Remove punctuation
    for punct in ['.', '?', '!', ',']:
        english_sentence = english_sentence.replace(punct, '')

    english_sentence = english_tokenizer.texts_to_sequences([english_sentence])
    english_sentence = pad(english_sentence, max_length)
    english_sentence = english_sentence.reshape((-1, max_length))   
    predictions = model.predict(english_sentence)[0]
    
    beam_results = beam_search_decoder(predictions, beam_width)
    
    # selecting the best result from beam search outputs
    best_sequence = beam_results[0][0]
    
    french_sentence = french_tokenizer.sequences_to_texts([best_sequence])[0]
    
    print("French translation: ", french_sentence)
    
    return french_sentence


## English to French translation

In [5]:
english_sentence = input("Enter the english sentence: ")

print("The translated french sentence is:", end="\n")
translate_to_french(english_sentence)

The translated french sentence is:
French translation:  quand est ce cette au


'quand est ce cette au'