In [124]:
import os
from keras.models import load_model
from pickle import load
from nltk.tokenize import word_tokenize
from keras.preprocessing.sequence import pad_sequences
import numpy as np

model_and_weights = os.path.join("../", "saved_models", "model_and_weights.hdf5")
model = load_model(model_and_weights)

for_server = load(open('../saved_models/for_server.pkl', 'rb'))
tokenizer, word_index, sequence_max_len = for_server[0], for_server[1], for_server[2]

In [125]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [132]:
test_text = "What is the deal with"

punctuations_filter='!"#$%&\'()*+,-/:;<=>@[\\]^_`{|}~'

def remove_punctuations(sentence):
    return sentence.translate(str.maketrans('', '', punctuations_filter))

def remove_dots(sentence):
    return sentence.replace('...', ' ')

def remove_nonascii(word):
    return ''.join([char if ord(char) < 128 else '' for char in word])

def make_lower(word):
    return word.lower()

def clean_word(word):
    processed = remove_nonascii(word)
    processed = make_lower(processed)
    return processed

print('Test text:', test_text)
temperature = 1.3

outputs = [test_text]
for _ in range(50):
    test_text = remove_punctuations(test_text)
    clean_text = []
    for word in word_tokenize(test_text):
        clean_text.append(clean_word(word))
        
    text_sequences = tokenizer.texts_to_sequences(clean_text)
    text_padded = pad_sequences(text_sequences, maxlen=sequence_max_len, truncating='pre')

    preds = model.predict(text_padded, verbose=0)[0]
    yhat = sample(preds, temperature)
    
    # map predicted word index to word
    out_word = ''
    EOS = ".?"
    for word, index in word_index.items():
        if index == yhat:
            out_word = word
            break
        
    if out_word in EOS:
        break
                
    outputs.append(out_word)
    test_text += " " + out_word
    
outputs = " ".join(outputs)
outputs += "."
print(outputs)

Test text: What is the deal with
What is the deal with crashed fumble health kicking forces gail thin normally design corked soda matters dad hookup 12 affectionate dondi pregnant sendrax towing fiftysixth uromycitisis released which issue fuckbleepeding jill indistinct aids deny badmouthed source era sr. service interference emergency ruler lunches loan hangin payment industrious think discount limitations overlook bucks to sorts.


In [133]:
import spacy, re

def improve_output(text):
    nlp = spacy.load('en_core_web_sm')
    doc = nlp(text)

    #Tag: The detailed part-of-speech tag. NN: common noun, NNS: plural, NNP: proper noun
    tagged_sentence = [(w.text, w.tag_) for w in doc]
    normalized_sent = [w.capitalize() if t in ["NNP", "NNPS"] else w for (w, t) in tagged_sentence]
    normalized_sent[0] = normalized_sent[0].capitalize()
    return re.sub(" (?=[\.,'!?:;])", "", ' '.join(normalized_sent))

print(improve_output(outputs))

What is the deal with crashed fumble health kicking forces gail thin normally design corked soda matters dad hookup 12 affectionate dondi pregnant sendrax towing fiftysixth uromycitisis released which issue fuckbleepeding jill indistinct aids deny badmouthed source era sr. service interference emergency ruler lunches loan hangin payment industrious think discount limitations overlook bucks to sorts.
