In [None]:
from random import randint
from pickle import load
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences

In [None]:
# load and read doc
def load_doc(filename):
    file = open(filename, 'r')
    text = file.read()
    file.close()
    return text

# load cleaned text sequences
in_filename = 'afr3004.txt'
doc = load_doc(in_filename)
lines = doc.split('\n')
seq_length = len(lines[0].split()) - 1

In [None]:
# load the model
model = load_model('afr_model.h5')
# load the tokenizer
tokenizer = load(open('tokenizer.pkl', 'rb'))

In [None]:
# select a seed text
seed_text = lines[randint(0,len(lines))]
print(seed_text + '\n')

# if you'd like to use your own seed text, use the following or a variation:
# seed_text = "Pig was thinking of Ananse's welfare while Ananse was planning Pig's death. "

In [None]:
# generate a sequence from a language model
def generate_seq(model, tokenizer, seq_length, seed_text, n_words):
    result = list()
    in_text = seed_text
    # generate a fixed number of words
    for _ in range(n_words):
        # encode the text as integer
        encoded = tokenizer.texts_to_sequences([in_text])[0]
        # truncate sequences to a fixed length
        encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
        # predict probabilities for each word
        yhat = model.predict_classes(encoded, verbose=0)
        # map predicted word index to word
        out_word = ''
        for word, index in tokenizer.word_index.items():
            if index == yhat:
                out_word = word
                break
        # append to input
        in_text += ' ' + out_word
        result.append(out_word)
    return ' '.join(result)

In [None]:
# generate new text
generated = generate_seq(model, tokenizer, seq_length, seed_text, 50)
print(generated)