# 1. Generative Models for Text

In [1]:
import numpy as np
import pandas as pd
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
import numpy as np
import random
import sys
import io
np.random.seed(7)

Using TensorFlow backend.


In [63]:
book1 = open('book1.txt', 'r', encoding="utf8").read().lower()[:10000]
book2 = open('book2.txt', 'r', encoding="utf8").read().lower()[:10000]
book3 = open('book3.txt', 'r', encoding="utf8").read().lower()[:10000]
book4 = open('book4.txt', 'r', encoding="utf8").read().lower()[:10000]

In [64]:
text = book1 + '\n' + book2 + '\n' + book3 + '\n' + book4
print('Text Length', len(text))

Text Length 40003


### Map Characters to Integers

In [65]:
chars = sorted(list(set(text)))
print('Total Unique Characters', len(chars))

Total Unique Characters 55


In [66]:
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

### Split up into subsequences

In [67]:
maxlen = 99
step = 1
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i:i + maxlen])
    next_chars.append(text[i + maxlen])
print('No. of Sequences', len(sentences))

No. of Sequences 39904


In [68]:
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

### Building Model

In [69]:
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

In [70]:
optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

### Helper Functions

In [71]:
def sample(preds, temperature):
    #Helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [82]:
def on_epoch_end(epoch, logs):
    #Function invoked at the end of each epoch. Prints generated Text.
    #print('\n----- Generating Text after Epoch: %d'%epoch)
    
    start_index = random.randint(0, len(text) - maxlen - 1)
    
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        #print('----- Diversity:', diversity)
        
        generated = ''
        sentence = text[start_index : start_index + maxlen]
        generated += sentence
        #print('----- Generating with seed:"'+sentence+'"')
        #sys.stdout.write(generated)
        
        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1
            
            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            
            generated += next_char
            sentence = sentence[1:] + next_char
            
            #sys.stdout.write(next_char)
            #sys.stdout.flush()
        #print('\n')
print_callback = LambdaCallback(on_epoch_end = on_epoch_end)

In [83]:
from keras.callbacks import ModelCheckpoint

filepath = 'weights.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor='loss',
                            verbose=1, save_best_only=True, 
                            mode='min')

In [84]:
from keras.callbacks import ReduceLROnPlateau

reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.2,
                             patience=1, min_lr=0.001)

In [85]:
callbacks = [print_callback, checkpoint, reduce_lr]

In [86]:
model.fit(x, y, batch_size=20, epochs=5, callbacks=callbacks)

Epoch 1/5

Epoch 00001: loss improved from inf to 1.53596, saving model to weights.hdf5
Epoch 2/5

Epoch 00002: loss improved from 1.53596 to 1.51318, saving model to weights.hdf5
Epoch 3/5

Epoch 00003: loss improved from 1.51318 to 1.49850, saving model to weights.hdf5
Epoch 4/5

Epoch 00004: loss improved from 1.49850 to 1.48852, saving model to weights.hdf5
Epoch 5/5

Epoch 00005: loss improved from 1.48852 to 1.46940, saving model to weights.hdf5


<keras.callbacks.History at 0x2c2193a1f98>

In [97]:
def generate_text(length, diversity):
    #Get random starting Text
    #start_index = random.randint(0, len(text)-maxlen-1)
    generated = ''
    sentence = 'There are those who take mental phenomena naively, just as they would physical phenomena. This scho'.lower()
    #sentence = text[start_index: start_index + maxlen]
    generated += sentence
    
    for i in range(length):
        x_pred = np.zeros((1, maxlen, len(chars)))
        for t, char in enumerate(sentence):
            x_pred[0, t, char_indices[char]] = 1.
        
        preds = model.predict(x_pred, verbose=0)[0]
        next_index = sample(preds, diversity)
        next_char = indices_char[next_index]
        
        generated += next_char
        sentence = sentence[1:] + next_char
    return generated

In [98]:
print(generate_text(1000, 0.2))

there are those who take mental phenomena naively, just as they would physical phenomena. this schoold the table and the table and considert to a strong and the table of the table of the same to and the same to show the same to the same was the same that the same was in the same that the table of the sense of and the same to a more are and the table of the table and man that the table of the same was a statement and the same to a certain the same and and the same was we see that the table is the same and the same to the same was in the table and the same was the table and the table and the colour that the table is the same with we see that the table of the table of the table and any of the sense and and at the same man consided in the same was an other and and the same to a colour the same to a strange and the same to a sense and and the science of the sense of the same world of the same most to a sense and and at the same man and the are in the same mover and any that the table of the