In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
import re
from nltk.tokenize import RegexpTokenizer

Using TensorFlow backend.


In [2]:
files = ['../data/shakespeare.txt'] 
text = ''

for filename in files:
    with open(filename) as f:
        for line in f:
            line = line.strip()
            line = re.sub(r'[^\w\'\-\s]','',line)
            #line = re.sub(r'[^\w\s]','',line)

            if len(line) > 0 and not line.isdigit():
                text += line.lower() + '\n'

In [3]:
# create mapping of unique chars to integers
chars = sorted(list(set(text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [4]:
print(chars)
n_chars = len(text)
n_vocab = len(chars)
print ("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)

['\n', ' ', "'", '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
Total Characters:  91006
Total Vocab:  30


In [5]:
# Train rnn from backward, setting the last word for rhyme first

# generate train data from backward

seq_length = 40
step = 2
sentences = []
next_chars = []
for i in range(0, n_chars - seq_length, step):
    seq_in = text[i + seq_length: i: -1]
    seq_out = text[i]
    sentences.append(seq_in)
    next_chars.append(seq_out)
n_patterns = len(sentences)
print("Total Patterns: ", n_patterns)

Total Patterns:  45483


In [6]:
print(sentences[0:10])
print(next_chars[0:10])

['esaercni erised ew serutaerc tseriaf mor', 't\nesaercni erised ew serutaerc tseriaf m', 'aht\nesaercni erised ew serutaerc tseriaf', ' taht\nesaercni erised ew serutaerc tseri', 'ht taht\nesaercni erised ew serutaerc tse', 'reht taht\nesaercni erised ew serutaerc t', 'bereht taht\nesaercni erised ew serutaerc', ' ybereht taht\nesaercni erised ew serutae', 'eb ybereht taht\nesaercni erised ew serut', 'uaeb ybereht taht\nesaercni erised ew ser']
['f', 'o', ' ', 'a', 'r', 's', ' ', 'r', 'a', 'u']


In [7]:
# binary
X = np.zeros((len(sentences), seq_length, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_to_int[char]] = 1
    y[i, char_to_int[next_chars[i]]] = 1

In [8]:
# build model
model = Sequential()
model.add(LSTM(256, return_sequences=True, input_shape=(seq_length, len(chars))))
model.add(Dropout(0.2))
model.add(LSTM(256, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')

In [9]:
model.fit(X, y, batch_size=64, epochs = 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2cf1c2027b8>

In [10]:
#model.save_weights('../weights/sonnet_20_64_backward_nopunc.h5')
model.save_weights('../weights/sonnet_20_64_backward_nopunc_except.h5')
#model.load_weights('../weights/sonnet_25_64_backward.h5')

In [11]:
def sample(preds, temperature = 1.0):
    # helper function to sample an index from a probability array

    preds = np.asarray(preds).astype('float')
    preds = np.log(preds) / temperature

    # Fix division by 0
    preds[preds == np.inf] = 0

    exp_preds = np.exp(preds)
    preds =  exp_preds / np.sum(exp_preds)
    
    return np.argmax(np.random.multinomial(1, preds, 1))

In [13]:
import sys

char_length = 800  # genrated length
for temperature in [0.75, 0.25]:
    print('\n' + 'temperature = ' + str(temperature) + '\n')
    
    generated = 'shall i compare thee to a summer\'s day \n'
    #generated = 'summers                                ' +'\n'
    sentence = generated[::-1]
    
    sys.stdout.write(generated)
    for i in range(char_length):
        x = np.zeros((1, seq_length, len(chars)))
        for t, char in enumerate(sentence):
            x[0, t, char_to_int[char]] = 1.

        preds = model.predict(x, verbose=0)[0]
        next_index = sample(preds, temperature)
        next_char = int_to_char[next_index]

        generated = next_char + generated
        sentence = sentence[1:] + next_char
        
    print(generated)


temperature = 0.75

shall i compare thee to a summer's day 
of this
peccused prive this it my give love's ture bow
and yet beleives all wid words of more
when thimed and mine uspiting of thee hore
if thou my love that i i behold and true
the summored words on love's longer writed
whilst the smore must in the breddoms well
my should loves to be distreds on my friend
and being on enced and pends not enfited
to this shadow pends not give time to suite
for thily like pon of thee bemore
to love to thee i make my love and love
but in the lowks and intureous and looks
and liven in the ourse looks trup kindness
with thise pervention give thou art of thee
mine i must to trespention can to write
o none doth lives etercal winter end
best can sinwed beauty from thee me cold
make me where thou art love is my love to one
to my side to this this thou lov'st come
shall i compare thee to a summer's day 


temperature = 0.25

shall i compare thee to a summer's day 
 loves in my love where alone
sweet l