In [1]:
# Libraries
import numpy as np
import matplotlib.pyplot as plt
from utils.preprocessing import *
from utils.model import *
from utils.config import *
from keras.models import load_model
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [2]:
# Paths
PATH_ENG = 'data/small_vocab_en'
PATH_FR = 'data/small_vocab_fr'
PATH_GLOVE = 'data/glove.6B.100d.txt'
MODEL_SAVE_PATH = 'weights/model.h5'

In [3]:
# Reading dataset
english = read_english(PATH_ENG)
french, french_inputs = read_french(PATH_FR)

Reading English Lines
Reading French Lines


In [4]:
# finding maximum length of input snetence
max_len_input = max(len(s) for s in english)

In [5]:
# Tokenizing English
input_sequence, word2idx_english = tokenize_english(english)

Tokenizing English Texts
Found 199 unique english tokens


In [6]:
# Tokenizing French
target_sequence, target_sequence_inputs, word2idx_french = tokenize_french(french, french_inputs)

Tokenizing French Texts
Found 353 unique french tokens


In [7]:
num_words_output = len(word2idx_french) + 1
max_len_target = max(len(s) for s in target_sequence)

In [8]:
# Padding all inputs for encoder and decoders
encoder_inputs, decoder_inputs, decoder_targets = padding(input_sequence,
                                                          target_sequence, 
                                                          target_sequence_inputs, 
                                                          max_len_input, 
                                                          max_len_target)

Padding..


In [9]:
# Loading GloVe Word Embedding
word2vec, embedding_matrix = glove_embedding(word2idx_english, PATH_GLOVE)

Loading GloVe word embedding
Found 400000 word vectors
Filling pre-trained embeddings...


In [10]:
num_words = min(MAX_NUM_WORDS, len(word2idx_english) + 1)

In [11]:
# creating object of model class
x = model(num_words, embedding_matrix, max_len_input, max_len_target, num_words_output)

In [12]:
# creating model
train_model = x.Seq2SeqModel()

In [13]:
# loading weights
train_model.load_weights(MODEL_SAVE_PATH)

In [14]:
# compile the model
train_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [15]:
idx2word_eng = {v:k for k, v in word2idx_english.items()}
idx2word_trans = {v:k for k, v in word2idx_french.items()}

In [16]:
# prediction model
prediction_model = x.prediction()

In [17]:
while True:
    final_input_seq = []
    n=199
    input_seq = input("Enter String ")
    for word in input_seq.split():
        if word in word2idx_english:
            final_input_seq.append(word2idx_english[word])
        else:
            n = n+1
            final_input_seq.append(n)
    final_input_seq = pad_sequences([final_input_seq], maxlen=max_len_input)
    translation = x.decode_sequence(final_input_seq, word2idx_french, prediction_model, idx2word_trans)
    print('-')
    print('Input sentence:', input_seq)
    print('Predicted translation:', translation)

    ans = input("Continue? [Y/n]")
    if ans and ans.lower().startswith('n'):
        break

Enter String i am driving
-
Input sentence: i am driving
Predicted translation: je , aimã© , mais mon .
Continue? [Y/n]n
