# Text Generation using LSTM 

In [1]:
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
import nltk
import numpy as np
import random
import sys
import io

Using TensorFlow backend.


In [2]:
from nltk.corpus import webtext 
nltk.download('webtext')

[nltk_data] Downloading package webtext to
[nltk_data]     C:\Users\69785hsh\AppData\Roaming\nltk_data...
[nltk_data]   Package webtext is already up-to-date!


True

In [3]:
# getting data
text = open('sherlock_homes.txt', 'r').read().lower()

In [4]:
len(text)

561852

# Map chars to integers

In [5]:
chars = sorted(list(set(text)))
print('total chars: ', len(chars))

total chars:  59


In [6]:
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# Split up into subsequences

In [7]:
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 187271


In [8]:
print(sentences[:3])
print(next_chars[:3])

['ï»¿adventure i. a scandal in bohemia\n\ni.', 'adventure i. a scandal in bohemia\n\ni.\n\nt', 'enture i. a scandal in bohemia\n\ni.\n\nto s']
['\n', 'o', 'h']


In [9]:
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

In [10]:
print(x[:3])
print(y[:3])

[[[False False False ... False False  True]
  [False False False ... False False False]
  [False False False ...  True False False]
  ...
  [ True False False ... False False False]
  [False False False ... False False False]
  [False False False ... False False False]]

 [[False False False ... False False False]
  [False False False ... False False False]
  [False False False ... False False False]
  ...
  [ True False False ... False False False]
  [ True False False ... False False False]
  [False False False ... False False False]]

 [[False False False ... False False False]
  [False False False ... False False False]
  [False False False ... False False False]
  ...
  [False False False ... False False False]
  [False  True False ... False False False]
  [False False False ... False False False]]]
[[ True False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False Fals

# Building Model

In [11]:
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

Instructions for updating:
Colocations handled automatically by placer.


In [12]:
optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

### Helper Functions

In [13]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [14]:
def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

### Defining callbacks

In [15]:
from keras.callbacks import ModelCheckpoint

filepath = "book.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss',
                             verbose=1, save_best_only=True,
                             mode='min')

In [16]:
from keras.callbacks import ReduceLROnPlateau
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.2,
                              patience=1, min_lr=0.001)

In [17]:
callbacks = [print_callback, checkpoint, reduce_lr]

### training

In [18]:
model.fit(x, y, batch_size=128, epochs=5, callbacks=callbacks)

Instructions for updating:
Use tf.cast instead.
Epoch 1/5

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "ion, but he half opened his
lids now and"
ion, but he half opened his
lids now and and a crient of the should she was a stare of the should not with the stretter of the stretter with a start of the strang and the strange of the stare and the sheen a confice and the stretter was a started to my served to the stret with her she was a stark with a me the man and were were with a start of the should as the man and read to the seen to me stret of the street and the stretter of the s
----- diversity: 0.5
----- Generating with seed: "ion, but he half opened his
lids now and"
ion, but he half opened his
lids now and we mr. which we was a minnt surned the mat street of her had been me undersartand a was the manked to she are will to as freen and that we well she was to drive your to me bend from all contre, in the drewn of a mere in a last abmetter wi

cause. it was the colonel day it have have been some as is in the place and all part of a confice. with for his firse what was the little change to all
think that the woman, and that i have a sonfice of the bedge-look. when i have not tell us an is perhaps upon the fact of the d
----- diversity: 1.0
----- Generating with seed: " that i was addressing wilhelm gottsreic"
 that i was addressing wilhelm gottsreichet a rited apray to openry and amiugion very keaid and love. a timent and his of the
chill window, it oher that i cut me holserpie a verdar, and give papered double.

"bere-poor me an
ones ago the curmmind stone of the use a yougg him. it on youf at just to a grave. he cousming in drunged at adventure emplies, and stabatoble doubt some
hay unname, though the same's but hen my a noble t. hown oh t
----- diversity: 1.2
----- Generating with seed: " that i was addressing wilhelm gottsreic"
 that i was addressing wilhelm gottsreich hathinghiow. thowche. grompled, we slooge, with a dur

<keras.callbacks.History at 0x1be28b83240>

# Testing the model

In [19]:
def generate_text(length, diversity):
    # Get random starting text
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated = ''
    sentence = text[start_index: start_index + maxlen]
    generated += sentence
    for i in range(length):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char
    return generated

In [20]:
print(generate_text(500, 0.2))

l excuse me if
i say that i cannot see however, was she was a man which he was the strange to the read of the statement that i have been any one of the strange of a bard of the station of the stall mine to the stall with and then the standing of the side of the start of the man which i should be the station of the side of the morning, and we went to the stall would be the statement of the state of the wead, and then he said the man which i have not not been and then i was a start of the station of the station of the cark to make his h
