In [1]:
import tensorflow
tf = tensorflow
print(tensorflow.__version__)

2.1.0


In [2]:
import numpy as np

In [3]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [4]:
#Possible fix to https://github.com/tensorflow/tensorflow/issues/24496
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

In [5]:
## Load the data
import pickle
with open('lotr_tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)
    print(tokenizer)


<keras_preprocessing.text.Tokenizer object at 0x000001F363BA3EC8>


In [6]:
model_options = [
    'lotr_prediction_model_1580426580',
    'lotr_prediction_model_1580492244',
]
MODEL_NAME = model_options[1]
model = tf.keras.models.load_model(MODEL_NAME)
PREDICTION_LEN = model.get_input_shape_at(0)[1] #ie 30

In [7]:
model.get_input_shape_at(0)

(None, 29)

In [8]:
seed_text = "Help me Obi Wan Kenobi, you're my only hope"
next_words = 30

for _ in range(next_words):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen=PREDICTION_LEN, padding='pre')
    predicted = model.predict_classes(token_list, verbose=0)
    output_word = ""
    for word, index in tokenizer.word_index.items():
        if index == predicted:
            output_word = word
            break
    seed_text += " " + output_word
print(seed_text)

Help me Obi Wan Kenobi, you're my only hope and for the east of the south there are change on the far end of the may ’ · ‘ did i hear out of anything the old words and


In [9]:
#token_list = tokenizer.texts_to_sequences(["YOUR TEXT HERE"])[0]
#token_list = pad_sequences([token_list], maxlen=PREDICTION_LEN, padding='pre')

def predict_word_prob(input_seqs):
    pr_probs = model.predict_proba(input_seqs)
    pr_outputs = [[
        (pr_probs[sen_i][index], word, index)
        for word, index in tokenizer.word_index.items()
    ] for sen_i in range(pr_probs.shape[0]) ]
    sorted_pr_outputs = [
        sorted(pr_output, reverse=True)
        for pr_output in pr_outputs
    ]
    return sorted_pr_outputs

def prepare_tokens(token_list):
    token_list = pad_sequences(
        [token_list],
        maxlen=PREDICTION_LEN,
        padding='pre',
    )
    return token_list

def prepare_sentence(sentence_text):
    token_list = tokenizer.texts_to_sequences([sentence_text])[0]
    return token_list

def predict_for_sentence(sentence_text):
    tl = prepare_sentence(sentence_text)
    pt = prepare_tokens(tl)
    predict = predict_word_prob(pt)
    return predict

def generate_next_n_words(seed_text, next_words=30):
    gen_text = seed_text
    for i in range(next_words):
        next_words = predict_for_sentence(gen_text)[0]
        top_prediction = next_words[0][1]
        
        gen_text += ' '+top_prediction
    return gen_text


In [10]:
predict_for_sentence("Frodo was")[0][:10]

[(0.48045704, 'restless', 2214),
 (0.1506405, 'in', 8),
 (0.03557991, 'feeling', 596),
 (0.03426382, 'not', 19),
 (0.022358749, 'roused', 2114),
 (0.016590374, 'sitting', 485),
 (0.014867146, 'afraid', 415),
 (0.012758062, 'content', 1123),
 (0.012047637, 'no', 45),
 (0.0120195905, 'chilled', 3080)]

In [11]:
seed_text = "Frodo was"
next_words = 30

gen_text = seed_text
import random
random_seeder = random.Random(42)
for i in range(next_words):
    next_words = predict_for_sentence(gen_text)[0]
    top_prediction = next_words[0][1]
    
    gen_text += ' '+top_prediction

gen_text

'Frodo was restless and anxious listening in vain for a sound or until he thought he thought the thought again tale it was felt again and he had never seen such as'

In [12]:
import random
random_seeder = random.Random(42)
random_seeder.random()

0.6394267984578837

In [13]:
seed_text = "Frodo was"
next_words = 30

rnd_temp = 4 #How 'safe' to keep the suggestions.
# A high value will cause the top suggestions to be taken more frequently.
# Lower values allow more occasional 'direction changes'.
# Best set between [1-Inf)

gen_text = seed_text
import random
random_seeder = random.Random(42)
for i in range(next_words):
    next_words = predict_for_sentence(gen_text)[0]
    rd = random_seeder.random() ** rnd_temp
    for sugg_vals in next_words:
        pr, word, token_num = sugg_vals
        rd -= pr
        if rd <=0:
            break
    selected = word
    
    gen_text += ' '+selected

gen_text

'Frodo was restless and anxious listening in vain for a sound or until he thought he thought the thought again felt but he felt the heart that he had had after many'

In [14]:
# seed_text = "Frodo was"
next_words = 30
num_options = 5

gen_text = seed_text
import random
random_seeder = random.Random(42)
for i in range(next_words):
    next_words = predict_for_sentence(gen_text)[0]
    options = next_words[:num_options]
    print(gen_text[-30:])
    print(', '.join(
        '%d: "%s"' % (i+1, options[i][1])
        for i in range(len(options))
    ))
    user_choice = input()
    if user_choice =='.':
        selected = '·'
    else:
        selected_num = int(user_choice)
        selected = options[selected_num - 1][1]
    
    gen_text += ' '+selected

gen_text

Frodo was
1: "restless", 2: "in", 3: "feeling", 4: "not", 5: "roused"
3
Frodo was feeling
1: "steadily", 2: "quite", 3: "rather", 4: "quickly", 5: "strong"
1
Frodo was feeling steadily
1: "quite", 2: "in", 3: "thought", 4: "out", 5: "quickly"
1
odo was feeling steadily quite
1: "quite", 2: "the", 3: "well", 4: "maybe", 5: "only"
3
as feeling steadily quite well
1: "in", 2: "for", 3: "·", 4: "sir", 5: "but"
3
 feeling steadily quite well ·
1: "he", 2: "‘", 3: "frodo", 4: "the", 5: "pippin"
5
g steadily quite well · pippin
1: "laughed", 2: "was", 3: "remained", 4: "soon", 5: "suddenly"
1
ly quite well · pippin laughed
1: "and", 2: "at", 3: "·", 4: "others", 5: "suddenly"
2
quite well · pippin laughed at
1: "the", 2: "hand", 3: "me", 4: "them", 5: "a"
1
e well · pippin laughed at the
1: "pony", 2: "answer", 3: "foot", 4: "opening", 5: "brown"
1
l · pippin laughed at the pony
1: "and", 2: "for", 3: "additions", 4: "‘", 5: "looking"
2
pippin laughed at the pony for
1: "a", 2: "he", 3: "they

'Frodo was feeling steadily quite well · pippin laughed at the pony for he had forgotten the old hobbit · frodo found his spirits were stronger but he felt he not forgotten'

In [15]:
prepare_sentence("Silmarillion is OOV")

[1, 22, 1]

In [16]:
tokenizer.sequences_to_texts([prepare_sentence("Silmarillion is OOV")])[0]

'<UNKNOWN> is <UNKNOWN>'

In [17]:
predict_for_sentence("Silmarillion is OOV")[0][:10]

[(0.7007815, "'", 11),
 (0.21458879, 'in', 8),
 (0.06252094, 'of', 4),
 (0.0043803714, 'now', 40),
 (0.0027238447, 'again', 64),
 (0.0027132356, 'for', 20),
 (0.002422171, 'yet', 111),
 (0.0018588509, 'and', 3),
 (0.0010601499, 'to', 5),
 (0.0008159144, 'up', 46)]

In [18]:
#%%timeit
#My timit loop got: 36.1 s ± 1.17 s per loop (mean ± std. dev. of 7 runs, 1 loop each)

gen_text = generate_next_n_words("Frodo", 1000)

In [19]:
import re
print("·\n".join(re.split(r'·\s+',gen_text)))

Frodo said nothing but sam ‘ how i mean to déagol ’ said the wizard looking round as quickly as if with a relief of relief ·
suddenly he was aware that he was himself brown and up the and rest gandalf behind ·
he stepped down to the pavilion ·
he was frodo in the last homely house like a answered of little feet but there was a cry of feeling going beyond simple but the biting lanterns were drinking and the river with small river and wrought eyes and age and for the kings ' ·
a moment has come out with his good dangerous he already accident ' ·
said frodo ·
the story just replied that an faithful with mr frodo ·
and quite accident i need too likely that he made that you worry about anything ·
i don't know what i mean ·
no time i would tell you a really like away i much came knowledge to say my part in that i think i shall ever get there ·
but i had forgotten it if you were present ·
‘ i am rather shaken with me ·
i know one of the little folk who we go with him ·
how it would be i i kno