In [1]:
import random
import pickle

import numpy as np
import pandas as pd
from nltk.tokenize import RegexpTokenizer

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Activation
from tensorflow.keras.optimizers import RMSprop

In [2]:
# Metrics
def top_5_categorical_accuracy(y_true, y_pred):
    return keras.metrics.top_k_categorical_accuracy(y_true, y_pred, k=5)
def top_10_categorical_accuracy(y_true, y_pred):
    return keras.metrics.top_k_categorical_accuracy(y_true, y_pred, k=10)
# Registriere die benutzerdefinierten Metrikfunktionen
custom_objects = {'top_5_categorical_accuracy': top_5_categorical_accuracy,
                  'top_10_categorical_accuracy': top_10_categorical_accuracy}
keras.backend.clear_session()
# load model
with tf.keras.utils.custom_object_scope(custom_objects):
    model = load_model('FinalModel/Model5.5.h5')
    
with open('FormatedData/100000Packs/MetaData/context_words.pickle', 'rb') as file:
    context_words = pickle.load(file)
with open('FormatedData/100000Packs/MetaData/unique_tokens.pickle', 'rb') as file:
    unique_tokens = pickle.load(file)
with open('FormatedData/100000Packs/MetaData/unique_token_index.pickle', 'rb') as file:
    unique_token_index = pickle.load(file)

In [3]:
def predict_next_word(input_text, n_best):
    input_text = input_text.lower()
    X = np.zeros((1, context_words, len(unique_tokens)))
    for i, word in enumerate(input_text.split()):
        X[0, i, unique_token_index[word]] = 1
                             
    predictions = model.predict(X,verbose=0)[0]
    return np.argpartition(predictions, -n_best)[-n_best:]

In [4]:
possible = predict_next_word("Once Upon a time there was a man who was",5)
print([unique_tokens[idx] for idx in possible])

['the', 'very', 'so', 'a', 'not']


In [5]:
tokenizer = RegexpTokenizer(r"\w+")
def generate_text(input_text, text_length, creativity=3):
    text = input_text.split()
    for i in range(text_length):
        sub_sequence = " ".join(tokenizer.tokenize(" ".join(text).lower())[i:i+context_words])
        try:
            # try to guess the next word
            choice = unique_tokens[random.choice(predict_next_word(sub_sequence, creativity))]
        except:
            # if the model does not know a word in sub_sequence
            choice = random.choice(unique_tokens)
        text.append(choice)
    return " ".join(text)

In [6]:
generate_text("Once Upon a time there was a man who was",100,5)

'Once Upon a time there was a man who was the same day the stranger in his mind in his room he did not perceive him but i am the only story of it the stranger is not worth that he said he would not go away in it but it stops the little boy and he did he would go in a large garret of corn and a few seconds in front and looked in his mind in a little street he was so tired and he had not seen to his own heart she thought of his life she thought of a little girl he had not to'