**Importing required Dependencies**

In [2]:
from numpy import argmax
from pickle import load
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import model_from_json

**Loading the Cleaned and Preprocessed Text data from .pkl File**

In [3]:
dataset = load(open('../input/french-to-english/english-french-both.pkl', 'rb'))

**Creating a Tokenizer for the Sentences and finding the Phrase with Maximum Length**

In [4]:
def create_tokenizer(lines):
	tokenizer = Tokenizer()
	tokenizer.fit_on_texts(lines)
	return tokenizer

def max_length(lines):
	return max(len(line.split()) for line in lines)

**Determining the Size of English & French Vocabulary and their respective Phrases with Maximum Length**

In [5]:
# preparing the english tokenizer

eng_tokenizer = create_tokenizer(dataset[:, 0])
eng_vocab_size = len(eng_tokenizer.word_index) + 1
eng_length = max_length(dataset[:, 0])

print('English Vocabulary Size: %d' % eng_vocab_size)
print('Size Of Maximum Length Phrase in English Vocabulary: %d' % (eng_length))

# preparing the french tokenizer

fra_tokenizer = create_tokenizer(dataset[:, 1])
fra_vocab_size = len(fra_tokenizer.word_index) + 1
fra_length = max_length(dataset[:, 1])
print('French Vocabulary Size: %d' % fra_vocab_size)
print('Size Of Maximum Length Phrase in French Vocabulary: %d' % (fra_length))

**Encoding Of the Sentences**

In [6]:
# Input and Output sequence must be encoded to integers and padded to the maximum phrase length
def encode_sequences(tokenizer, length, lines):
	# integer encode sequences
	x = tokenizer.texts_to_sequences(lines)
	# pad sequences with 0 values
	x = pad_sequences(x, maxlen=length, padding='post')
	return x

**Loading and Compiling the Created Model**

In [7]:
json_file = open('../input/french-to-english/model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
# loading the computed weights into the New Model
model.load_weights("../input/french-to-english/model.h5")
model.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['acc'])

**Evaluating the Loaded Model and Determining the English Translation against User Input**

In [8]:
# mapping integer to a word
def word_for_id(integer, tokenizer):
	for word, index in tokenizer.word_index.items():
		if index == integer:
			return word
        
	return None


# generating target given source sequence
def predict_sequence(model, tokenizer, source):
	prediction = model.predict(source, verbose=0)[0]
	integers = [argmax(vector) for vector in prediction]
	target = list()
    
	for i in integers:
		word = word_for_id(i, tokenizer)
		if word is None:
			break
            
		target.append(word)
        
	return ' '.join(target)

**Performing Predictions on User Input**

In [9]:
def frnch_eng(text):
    sentc = encode_sequences(fra_tokenizer, fra_length, [text])
    trans = predict_sequence(model, eng_tokenizer, sentc)
    return trans

In [16]:
frnch_eng("vous courez tres rapidement")