In [112]:
# import keras
from keras.models import load_model
import json
from nltk.tokenize import word_tokenize
import numpy as np
import random
# import sys
import pandas as pd

In [2]:
# model = load_model('RNN_weights.best.hdf5')
model = load_model('my_model.h5')

In [3]:
def sample(preds, temperature=1.0):
    """
    Compute new probability distribution based on the temperature
    Higher temperature creates more randomness.

    :param preds: numpy array of shape (unique chars,), and elements sum to 1
    :type  preds: numpy.ndarray
    :param temperature: characterizes the entropy of probability distribution
    :type  temperature: float
    :returns: a number 0 to the length of preds - 1
    :rtype:   int
    """

    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [19]:
def text_generate(model, text, word_indices, maxlen=10, temperature=1.0, textlen=40):
    """
    Generate text based on a model.

    :param model: trained keras model
    :type  model: keras.engine.sequential.Sequential
    :param text: lyrics
    :type  text: str
    :param char_indices: dictionary mapping a character to its integer placeholder
    :type  char_indices: dict
    :param maxlen: maximum length of the sequences
    :type  maxlen: int
    :param textlen: Number of characters of generated sequence
    :type  textlen: int
    """

    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    full_sentence = " ".join(generated_text)

    out_text = generated_text

    for i in range(textlen):

        sampled = []
        for t, word in enumerate(generated_text):
            word_dimensions = list(wordvectors_mini[word])
            sampled.append(word_dimensions)
        sampled = np.array(sampled)
        sampled = np.reshape(sampled, (1,) + sampled.shape)

        preds = model.predict(sampled, verbose=0)[0]
        next_index = sample(preds, temperature)
        next_index = str(next_index)
        next_word = word_indices[next_index]

        generated_text.append(next_word)
        generated_text = generated_text[1:]
        out_text.append(next_word)
    return out_text

In [5]:
# tokens[:200]


def find_random_sentence(tokens, word, maxlen):
    list_of_appearance = np.where(np.array(tokens) == word)[0]
    stop_characters = set({'...', '.', '?', '!'})
    random_index = random.choice(list_of_appearance)
    index = random_index

    sentence = []
    while (tokens[index] not in stop_characters):
        sentence.append(tokens[index])
        index += 1
    sentence.append(tokens[index])

    index = random_index

    while ((tokens[index] not in stop_characters) or len(sentence) < 11):
        sentence.insert(0, tokens[index])
        index -= 1

    return sentence[:maxlen]

In [6]:
def text_generate_with_word(
        model,
        text,
        word_indices,
        word,
        maxlen=10,
        temperature=1.0,
        textlen=40):
    """
    Generate text based on a model.
    The starting seed is based on a word input 

    :param model: trained keras model
    :type  model: keras.engine.sequential.Sequential
    :param text: lyrics
    :type  text: str
    :param char_indices: dictionary mapping a character to its integer placeholder
    :type  char_indices: dict
    :param word: the input starting word
    :type  word: str
    :param maxlen: maximum length of the sequences
    :type  maxlen: int
    :param textlen: Number of characters of generated sequence
    :type  textlen: int
    """

    stop_characters = set({'...', '.', '?', '!'})

    generated_text = find_random_sentence(tokens, word, maxlen)
    full_sentence = " ".join(generated_text)

    out_text = generated_text

#     for i in range(textlen):
    stop_generate = False
    i = 0
    while ((i < textlen) or (not stop_generate)):

        sampled = []
        for t, word in enumerate(generated_text):
            word_dimensions = list(wordvectors_mini[word])
            sampled.append(word_dimensions)
        sampled = np.array(sampled)
        sampled = np.reshape(sampled, (1,) + sampled.shape)

        preds = model.predict(sampled, verbose=0)[0]
        next_index = sample(preds, temperature)
        next_index = str(next_index)
        next_word = word_indices[next_index]

        generated_text.append(next_word)
        generated_text = generated_text[1:]
        out_text.append(next_word)

        if (next_word in stop_characters):
            stop_generate = True
        i += 1
    return out_text

In [7]:
# change this so I download the text after making this changes, so I can just upload th
# updated text
# actually, change it so 
import re
text = open('all.txt', 'r').read()
text = text.lower()
text = re.sub(r'[*^$%&()@#-+_=//]', ' ', text)
text = re.sub(" \d+", "number", text)
text = re.sub(r'http\S+', ' ', text)

In [None]:
# text[:5000]

In [8]:
tokens = word_tokenize(text)
with open('mapping.json') as infile:
    word_indices = json.load(infile)

In [116]:
# tokens
# word_indices

from keras.preprocessing.text import text_to_word_sequence
from keras.preprocessing.text import Tokenizer 
# tokens2 = Tokenizer

tokens2 = text_to_word_sequence(text)

In [117]:
tokens2

['did',
 'you',
 'have',
 'your',
 'showerhead',
 'plugged',
 'in',
 'or',
 'is',
 'it',
 'a',
 'wireless',
 'model',
 'yep',
 'and',
 'when',
 'you',
 'finally',
 'switch',
 'to',
 'another',
 'career',
 'it',
 'really',
 'throws',
 'you',
 'off',
 'when',
 'your',
 'coworkers',
 'measure',
 'years',
 'differently',
 'thats',
 'a',
 'nice',
 'dark',
 'thought',
 'that',
 'will',
 'now',
 'pop',
 'into',
 'my',
 'head',
 'during',
 'my',
 'next',
 'birthday',
 'i',
 'want',
 'to',
 'be',
 'cremated',
 'and',
 'then',
 'put',
 'into',
 'one',
 'of',
 'those',
 'cardboard',
 'pods',
 'that',
 'grows',
 'into',
 'a',
 'tree',
 'so',
 'one',
 'day',
 'i',
 'will',
 'be',
 'cut',
 'down',
 'and',
 'turned',
 'into',
 'someone',
 'elses',
 'coffin',
 'would',
 'they',
 'think',
 'it’s',
 'cool',
 'if',
 'it',
 'was',
 'vomit',
 'u',
 'the',
 'only',
 'one',
 'thinking',
 'that',
 'bro',
 'i',
 'wanted',
 'to',
 'get',
 'to',
 'know',
 'somebody',
 'better',
 'so',
 'i',
 'asked',
 'them',
 '

In [None]:
len (tokens)

In [10]:
# wordvectors_mini = json.loads('wordvectors_mini.json')#.decode("utf-8")
# wordvectors_mini = json.load('test.json')
with open('wordvectors_mini.json') as infile:
    wordvectors_mini = json.load(infile)
# text_generate_with_word(model, tokens, word_indices, 'pizza')
# test_csv = pd.read_csv('wordvectors_mini.csv')

In [None]:
# test_csv.to_dict()#.shape#.columns

In [None]:
# test_csv
# wordvectors_mini

In [145]:
def gen_text():
    
    import language_check
    tool = language_check.LanguageTool('en-US')
    
    try: 
#         sentence = text_generate_with_word(
#             model, tokens, word_indices, 'pizza', maxlen=20
#         )
        sentence = text_generate(model, tokens, word_indices, maxlen=20)
        final_text = ' '.join(sentence)
        matches = tool.check(final_text)
        final_text = language_check.correct(final_text, matches)
        return final_text
    except ValueError:
        sentence = text_generate(model, tokens, word_indices, maxlen=20)
        final_text = ' '.join (sentence)
        matches = tool.check(final_text)
        final_text = language_check.correct(final_text, matches)
        return final_text
    except KeyError:
        gen_text()
    
    

In [146]:
def write_to_file(n):
    file = open('sample_text.txt', 'a')
    for i in range(n):
        try:
            text = gen_text()
            if text != None:
                file.write(text)
                file.write('\n')
        except KeyError:
            pass
    file.close()

In [None]:
write_to_file(1000)

In [110]:
# final_text = ' '.join (text_generate(model, tokens, word_indices, maxlen=20))
final_text = gen_text()
final_text

'it was either this or no nose. ” perhaps the reason pizza pizza is so good is because its the friends friends was a think you dont between bad s ’ from the by drug yet happy , but faster like i have to kinda i just that a one bad how my dad . im they we have nothing to'

In [101]:
# wordvectors_mini['bynumber']#.keys()

In [111]:
import language_check
tool = language_check.LanguageTool('en-US')
matches = tool.check(final_text)
language_check.correct(final_text, matches)

"It was either this or no nose. ” perhaps the reason pizza is so good is because it's the friends was a think you Mont between bad s ’ from the by drug yet happy, but faster like i have to kinda i just that a one bad how my dad. Am they we have nothing to"

In [176]:
# import random

def read_random_line(directory):
    
    lines = open(directory).read().splitlines()
    myline = random.choice(lines)
    return(myline)
    
read_random_line('sample_text.txt')

'The work. Caveman comedian : so a guy and bear walk into bar. The bartender says, what ll)) now they for a goes ! * - fire 1 : explain out that ” this can look it asked putting will go in the to be and gave it. there ’ s what about that day'