In [39]:
from __future__ import print_function
from keras.callbacks import LambdaCallback, ModelCheckpoint
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
from keras.models import load_model
from keras.models import Model
import numpy as np
import random
import sys
import io
import tensorflow as tf
import re



#remove word Chapter
fin = open("/Users/neilwatt/Documents/BIs/PrWeb/2018Posts/August/TextGeneration/PrideAndPrejudice/Original.txt", encoding='utf-8')
fout = open("Clean.txt", "w+", encoding='utf-8')
delete_list = ['Chapter']
for line in fin:
    for word in delete_list:
        line = line.replace(word, "")
    fout.write(line)
fin.close()
fout.close()



In [20]:
#reading in copy of text with "Chapter" removed
Clean = '/Users/neilwatt/Documents/BIs/PrWeb/2018Posts/August/TextGeneration/PrideAndPrejudice/Clean.txt' # first command line arg
with io.open(Clean, encoding='utf-8') as f:
     Corpus = f.read().lower().replace('\n', ' \n ')

#remove numbers from text
text=re.sub(r"\b\d+\b", "", Corpus)

#review first 100 characters
text[:100]

' \n  \n  \n   \n  \n  \n it is a truth universally acknowledged, that a single man in possession \n of a go'

In [5]:


# runing this on a GPU as very computationally expensive
config = tf.ConfigProto()
#only allocate as much GPU memory based on runtime allocations, initially little but allows memory to be extended
config.gpu_options.allow_growth = True


chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters for calibrating model against
#here max length is pretty arbitrary, in future posts going to play around with this
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1



total chars: 52
nb sequences: 243191
Vectorization...


In [4]:
# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)


Build model...


In [6]:
def sample(preds, temperature=0.5):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)




In [7]:
def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

In [7]:
            
# define the checkpoint so I can load model in future
filepath = "weights.hdf5"
checkpoint = ModelCheckpoint(filepath, 
                             monitor='loss', 
                             verbose=1, 
                             save_best_only=True, 
                               mode='min')

# fit model using the gpu
#batch size is a particularly important hyperparamter which I intend to play around within future posts
with tf.device('/gpu:0'):
    model.fit(x, y,
              batch_size=200,
              epochs=60,
              verbose=2,
              callbacks=[print_callback, checkpoint])

Epoch 1/60
 - 210s - loss: 1.8589

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "beth had mentioned her 
 name to her mot"
beth had mentioned her 
 name to her mother with she was not the counde of the counde of the counded the stist of the counded to me 
 an the reading the latter a for the counded to merether with the counds a for the 
 counds of the was the reading the manter of the country of the 
 country of the counded of the last miss bennet a more the reading of the lady can her at her at a for the counds. 
  
 “the elizabeth was the was a for the 

----- diversity: 0.5
----- Generating with seed: "beth had mentioned her 
 name to her mot"
beth had mentioned her 
 name to her mother, that you make a more that her toul the 
 elizabeth had now may had were miss before in the 
 more the winding at the was a your his to her are a presing of sister sister his and a paysion of the country and i am not chied the four at the country 
 with a for

  after removing the cwd from sys.path.


o her 
 from the attemper states of the awone him from the so were to see the servant, and her dischark to think 
 her facole was than the heard on the from 
 her from her 
 for the great any of me to her heard to the acconding to deterful, and she was 
 to be and in the morning discold mean to be the presenut to her to her the morning in her to be the lang--and 
----- diversity: 1.0
----- Generating with seed: "with him, and eagerly cried out: 
  
 “o"
with him, and eagerly cried out: 
  
 “oh! what 
 she should all asswerness for the the convincting; and where such a commanger. whe shope whet she and -what known she was vert to her as elizabeth he are eoner's decontions with broper and thought she can ening eldance on his sister, as societed oriving the clon. 
 there had soon not, everything dones, jo_se; and mrs. hurst belom you, “for theire, if they could their strighthes, noir in 
----- diversity: 1.2
----- Generating with seed: "with him, and eagerly cried out: 
  
 “o"
with him,

In [42]:
def generate_output():
    generated = ''
    usr_input = input("Input some sample text and the model will attempt to complete it in Jane Austen style. Your input is: ")
    sentence = ('{0:0>' + str(maxlen) + '}').format(usr_input).lower()
    generated += usr_input 

    sys.stdout.write("\n\nHere is your poem: \n\n") 
    sys.stdout.write(usr_input)
    for i in range(400):

        #x_pred = np.zeros((1, Tx, len(chars)))
        x = np.zeros((1, maxlen, len(chars)))

        for t, char in enumerate(sentence):
            if char != '0':
                #x_pred[0, t, char_indices[char]] = 1.
                x[0, t, char_indices[char]] = 1.

        #preds = model.predict(x_pred, verbose=0)[0]
        preds = model.predict(x, verbose=0)[0]
        next_index = sample(preds, temperature = 1.0)
        next_char = indices_char[next_index]

        generated += next_char
        sentence = sentence[1:] + next_char

        sys.stdout.write(next_char)
        sys.stdout.flush()

        if next_char == '\n':
            continue

In [43]:

generate_output()

Input some sample text and the model will attempt to complete it in Jane Austen style. Your input is: I walked to the park with Mr Darcy.


Here is your poem: 

I walked to the park with Mr Darcy. her great tolerable of myself friend.” 
  
 “and will have the neglection, with answering the father's distrists of my own bind. i endeglbation.” 
  
 “indire on that heing deprived 
 me, and so itthive went of the time you at last 
 relief. wickham; it must have been my how not has so must desirent, indeed; for in the spenting the earneaghmen of mr. darcy by added, settle of it, must be much sli