# 1. Generative Models for Text

In [1]:
import numpy as np
import pandas as pd
from __future__ import print_function
import keras
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
import numpy as np
import random
import sys
import io
np.random.seed(7)

Using TensorFlow backend.


In [16]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
book1 = io.open('book1.txt', 'r', encoding="utf8").read().lower()
book2 = io.open('book2.txt', 'r', encoding="utf8").read().lower()
book3 = io.open('book3.txt', 'r', encoding="utf8").read().lower()
book4 = io.open('book4.txt', 'r', encoding="utf8").read().lower()

In [3]:
text = book1 + '\n' + book2 + '\n' + book3 + '\n' + book4
print('Text Length', len(text))

Text Length 1593556


### Map Characters to Integers

In [4]:
chars = sorted(list(set(text)))
print('Total Unique Characters', len(chars))

Total Unique Characters 99


In [5]:
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

### Split up into subsequences

In [6]:
maxlen = 99
step = 1
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i:i + maxlen])
    next_chars.append(text[i + maxlen])
print('No. of Sequences', len(sentences))

No. of Sequences 1593457


In [7]:
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

### Building Model

In [8]:
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

In [9]:
optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

### Helper Functions

In [10]:
def sample(preds, temperature):
    #Helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [11]:
def on_epoch_end(epoch, logs):
    #Function invoked at the end of each epoch. Prints generated Text.
    
    start_index = random.randint(0, len(text) - maxlen - 1)
    
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        generated = ''
        sentence = text[start_index : start_index + maxlen]
        generated += sentence
        
        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1
            
            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            
            generated += next_char
            sentence = sentence[1:] + next_char
print_callback = LambdaCallback(on_epoch_end = on_epoch_end)

In [12]:
from keras.callbacks import ModelCheckpoint

filepath = 'weights.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor='loss',
                            verbose=0, save_best_only=True, 
                            mode='min')

In [23]:
def generate_text(length, diversity=0.5):
    #Get random starting Text
    generated = ''
    sentence = 'There are those who take mental phenomena naively, just as they would physical phenomena. This scho'.lower()
    generated += sentence
    
    for i in range(length):
        x_pred = np.zeros((1, maxlen, len(chars)))
        for t, char in enumerate(sentence):
            x_pred[0, t, char_indices[char]] = 1.
        
        preds = model.predict(x_pred, verbose=0)[0]
        next_index = sample(preds, diversity)
        next_char = indices_char[next_index]
        
        generated += next_char
        sentence = sentence[1:] + next_char
    return generated

In [14]:
from keras.callbacks import ReduceLROnPlateau

reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.2,
                             patience=1, min_lr=0.001)

In [15]:
callbacks = [print_callback, checkpoint, reduce_lr]

In [17]:
model.fit(x, y, batch_size=1024, epochs=30, callbacks=callbacks)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f593a36ee90>

In [18]:
model.save('TextGenerator.h5')

### Output Results

In [28]:
print(generate_text(1000))

there are those who take mental phenomena naively, just as they would physical phenomena. this school event of entities of physics when it is only to be just as we can be completed in the end of a property of the problems of the things to be accounted by external wholly important belong to the self-evidence of the sense of sense-data of the proposition of the attempt to the distinction of the sense which we may have been defined by the advances of sense-data of the subject of which the objective which we have allowed to be the same and proposition of the two of the whole, while we have no different perspectives that he is the sense of the senses of what is inferred from the matter of a sensible to the whole of the way and a sensation of the cause of the
sense who have a result of an extent of the law of things which is a relation of many with what is the previous sense-data when it is the physical objects and particular who were to the sense of the sense of the whole of the sense of th

*End of Question - 1. Next Question attached in* **Sub7-Notebook_K=4.ipynb**