In [1]:
import numpy as np
import keras
from keras.preprocessing import text, sequence
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense
from keras.callbacks import TensorBoard, ModelCheckpoint

Using TensorFlow backend.


In [2]:
fname = "data/alice.txt"
raw_text = open(fname).read().lower()

In [3]:
text_len = len(raw_text)
print('Text length:', text_len)

Text length: 163817


In [4]:
charset = sorted(list(set(raw_text)))
char_to_int = {c:i for (i,c) in enumerate(charset)}
int_to_char = {i:c for (i,c) in enumerate(charset)}
n_vocab = len(charset)
print('Number of characters:', n_vocab)

Number of characters: 61


In [5]:
max_seq_len = 100

In [6]:
seqs, next_chars = [], []
for i in range(text_len - max_seq_len):
    seqs.append(raw_text[i:i+max_seq_len])
    next_chars.append(raw_text[i+max_seq_len])
n_seqs = len(seqs)
print('Number of sequences:', n_seqs)

Number of sequences: 163717


In [7]:
X = np.zeros([n_seqs, max_seq_len, n_vocab], dtype=np.bool)
y = np.zeros([n_seqs, n_vocab], dtype=np.bool)

for i,seq in enumerate(seqs):
    for j,c in enumerate(seq):
        X[i,j,char_to_int[c]] = 1
    y[i,char_to_int[next_chars[i]]] = 1

In [8]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [9]:
model = Sequential()
model.add(LSTM(256, input_shape=(max_seq_len,n_vocab)))
model.add(Dropout(0.2))
model.add(Dense(n_vocab, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [10]:
model_checkpoint = ModelCheckpoint('weights.{epoch:02d}-{loss:.2f}.hdf5', monitor='loss', verbose=0,
                                  save_best_only=True, mode='min')
tensorboard = TensorBoard(log_dir='./logs', histogram_freq=1, write_graph=True, write_images=False)
callbacks_list = [model_checkpoint, tensorboard]

In [11]:
# Uncomment to retrain

# nb_epoch = 20
# batch_size = 128
# model.fit(X, y, nb_epoch=nb_epoch, batch_size=batch_size, callbacks=callbacks_list)

In [26]:
# Loading model from weight

weight_fname = 'weights.19-1.10.hdf5'
model.load_weights(weight_fname)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [52]:
# Generate random start

num_iter = 1000
temperature = 0.5
start = np.random.randint(0,text_len-max_seq_len-1,size=1)
sentence = raw_text[start:start+max_seq_len]
print(sentence,end='')

for i in range(num_iter):
    x = np.zeros((1, max_seq_len, n_vocab))
    for j,c in enumerate(sentence):
        x[0,j,char_to_int[c]] = 1
    prediction = model.predict(x)[0]
    sample_idx = int(sample(prediction,temperature))
    next_char = int_to_char[sample_idx]
    sentence = sentence[1:] + next_char
    print(next_char,end='')
    



 didn’t write it, and they
can’t prove i did: there’s no name signed at the end.’

‘if you didn’t silented that she was and them,’ said the caterpillar.

‘it was a treak butter, the caterpillar walle!’ said the hatter.

‘i dinn’t a deepland it,’ said the queen, and she had sort
of the hatter were all trouse on the little king and head to go, and well be a great dirach a a child: and
passions all
the united stated. in a bogenelf that she said to change the cook to the sound of the going, and the footman in the sound of her head to alice, the queen was getting a brish of the queen, and then was to was so much first, and said to herself and got in a bouth, and alice was a very much in a treacl of the table, but she was quite pleased to himsely in it, and the way of course, the party was the little golden the sumple. i don’t the marke cat would this again, and she was a little is the right to be a little baty on the sound of been and sighing as she could
crowd of the other of the suchated
