In [1]:
# import keras
from keras.models import load_model
import json
from nltk.tokenize import word_tokenize
import numpy as np
import random
import sys
import pandas as pd

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# model = load_model('RNN_weights.best.hdf5')
model = load_model('my_model.h5')

In [3]:
def sample(preds, temperature=1.0):
    """
    Compute new probability distribution based on the temperature
    Higher temperature creates more randomness.
    
    :param preds: numpy array of shape (unique chars,), and elements sum to 1
    :type  preds: numpy.ndarray
    :param temperature: characterizes the entropy of probability distribution
    :type  temperature: float
    :returns: a number 0 to the length of preds - 1
    :rtype:   int
    """
    
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [4]:
def text_generate(model, text, word_indices, maxlen=10, temperature=1.0, textlen=40):
    """
    Generate text based on a model.
    
    :param model: trained keras model
    :type  model: keras.engine.sequential.Sequential
    :param text: lyrics
    :type  text: str
    :param char_indices: dictionary mapping a character to its integer placeholder
    :type  char_indices: dict
    :param maxlen: maximum length of the sequences
    :type  maxlen: int
    :param textlen: Number of characters of generated sequence
    :type  textlen: int
    """
    
    start_index = random.randint(0, len(text) - maxlen - 1) 
    generated_text = text[start_index: start_index + maxlen] 
    full_sentence = " ".join (generated_text)
    print(len(generated_text))
    print('--- Generating with seed: "' + full_sentence + '"')
    
    print('------ temperature:', temperature)
    sys.stdout.write(full_sentence)
    
    
    for i in range(textlen):
        
        sampled = []
        for t, word in enumerate(generated_text):
            word_dimensions = list (wordvectors_mini[word])
            sampled.append(word_dimensions)
        sampled = np.array(sampled)
        sampled = np.reshape(sampled, (1,) + sampled.shape ) 

        preds = model.predict(sampled, verbose=0)[0]
        next_index = sample(preds, temperature)
        next_word = word_indices[next_index]

        generated_text.append ( next_word)
        generated_text = generated_text[1:]
        sys.stdout.write(" " + next_word)


In [5]:
# tokens[:200]

def find_random_sentence(tokens, word, maxlen):
    list_of_appearance = np.where(np.array(tokens) == word)[0]
    stop_characters = set({'...', '.', '?', '!'})
    random_index = random.choice(list_of_appearance)
    index = random_index
    
    sentence = []
    while (tokens[index] not in stop_characters):
        sentence.append(tokens[index])
        index += 1
    sentence.append(tokens[index])
    
    index = random_index
    
    while ( (tokens[index] not in stop_characters) or len(sentence) < 11):
        sentence.insert(0, tokens[index])
        index -= 1
    
    return sentence[:maxlen]

In [6]:
def text_generate_with_word(
    model, 
    text, 
    word_indices,
    word,
    maxlen=10, 
    temperature=1.0,
    textlen=40):
    """
    Generate text based on a model.
    The starting seed is based on a word input 
    
    :param model: trained keras model
    :type  model: keras.engine.sequential.Sequential
    :param text: lyrics
    :type  text: str
    :param char_indices: dictionary mapping a character to its integer placeholder
    :type  char_indices: dict
    :param word: the input starting word
    :type  word: str
    :param maxlen: maximum length of the sequences
    :type  maxlen: int
    :param textlen: Number of characters of generated sequence
    :type  textlen: int
    """
    
    stop_characters = set({'...', '.', '?', '!'})
    
    generated_text = find_random_sentence(tokens, word, maxlen)
    full_sentence = " ".join (generated_text)
    print(len(generated_text))
    print('--- Generating with seed: "' + full_sentence + '"')
    
    print('------ temperature:', temperature)
    sys.stdout.write(full_sentence)
    
    out_text = generated_text
    
#     for i in range(textlen):
    stop_generate = False
    i = 0
    while ( (i < textlen) or (not stop_generate) ):
        
        sampled = []
        for t, word in enumerate(generated_text):
            word_dimensions = list (wordvectors_mini[word])
            sampled.append(word_dimensions)
        sampled = np.array(sampled)
        sampled = np.reshape(sampled, (1,) + sampled.shape ) 

        preds = model.predict(sampled, verbose=0)[0]
        next_index = sample(preds, temperature)
        next_index = str(next_index)
        next_word = word_indices[next_index]

        generated_text.append ( next_word)
        generated_text = generated_text[1:]
        sys.stdout.write(" " + next_word)
        out_text.append(next_word)
        
        if (next_word in stop_characters):
            stop_generate = True
        i += 1
    return out_text


In [17]:
import re
text = open('all.txt', 'r').read()
text = text.lower()
text = re.sub(r'[*^$%&()@#-+_=//]', ' ', text)
text = re.sub(" \d+", "number", text)
text = re.sub(r'http\S+', ' ', text)

In [18]:
text[:5000]

'did you have your showerhead plugged in? or is it a wireless model?\nyep. and when you finally switch to another career, it really throws you off when your coworkers measure years differently. \nthats a nice dark thought that will now pop into my head during my next birthday...\ni want to be cremated and then put into one of those cardboard pods that grows into a tree so one day i will be cut down and turned into someone elses coffin.\nwould they think it’s cool if it was vomit?\nu the only one thinking that bro\ni wanted to get to know somebody better, so i asked them how their vaction went in mexico - simple enough right?   she told me to follow her instagram, where i could find out for myself.  at least it saved me the effort of getting to know them in the first place?  edit: i know, she probably wasnt interested in getting to know me, but a simple it was good would have gotten the message across just as well... \nits just the outside catching up with the inside.\ni am sorry detect

In [None]:
tokens = word_tokenize(text)
with open('mapping.json') as infile:
    word_indices = json.load(infile)

In [8]:
# wordvectors_mini = json.loads('wordvectors_mini.json')#.decode("utf-8")
# wordvectors_mini = json.load('test.json')
with open('wordvectors_mini.json') as infile:
    wordvectors_mini = json.load(infile)
# text_generate_with_word(model, tokens, word_indices, 'pizza')
# test_csv = pd.read_csv('wordvectors_mini.csv')

In [9]:
# test_csv.to_dict()#.shape#.columns

In [10]:
# test_csv
# wordvectors_mini

In [11]:
text_generate_with_word(model, tokens, word_indices, 'pizza', maxlen=20)

20
--- Generating with seed: "as someone who doesn ’ t live in the usa , can someone explain what makes an old pizza pizza"
------ temperature: 1.0
as someone who doesn ’ t live in the usa , can someone explain what makes an old pizza pizza . we are saying in instead a gave for a pretty a job . just a ’ movie started and the last part . a off into the where i have a and ( with an ? term https .

['as',
 'someone',
 'who',
 'doesn',
 '’',
 't',
 'live',
 'in',
 'the',
 'usa',
 ',',
 'can',
 'someone',
 'explain',
 'what',
 'makes',
 'an',
 'old',
 'pizza',
 'pizza',
 '.',
 '.',
 'we',
 'are',
 'saying',
 'in',
 'instead',
 'a',
 'gave',
 'for',
 'a',
 'pretty',
 'a',
 'job',
 '.',
 'just',
 'a',
 '’',
 'movie',
 'started',
 'and',
 'the',
 'last',
 'part',
 '.',
 'a',
 'off',
 'into',
 'the',
 'where',
 'i',
 'have',
 'a',
 'and',
 '(',
 'with',
 'an',
 '?',
 'term',
 'https',
 '.']