In [1]:
from __future__ import division, print_function
import numpy as np

from keras.models import Sequential
from keras.layers import Embedding, Dropout, Activation
from keras.layers import LSTM, TimeDistributed, Dense
from keras.optimizers import Adam
from keras.preprocessing import sequence

Using Theano backend.
Using gpu device 0: GeForce GTX 850M (CNMeM is disabled, cuDNN 5105)


In [2]:
sentences = []
fd = open('data/sentences.txt', 'r')
for line in fd:
    sentences.append(line[:-1])
fd.close()

In [3]:
print('Number of sentences:', len(sentences))
print('Average sentence length: ', np.sum([len(l) for l in sentences]) / len(sentences))

Number of sentences: 29878
Average sentence length:  41.7722404445


In [4]:
corpus = ' '.join(sentences)
print('Corpus length:', len(corpus))

Corpus length: 1277948


In [5]:
chars = sorted(list(set(corpus)))
vocab_size = len(chars)
print('Total chars:', len(chars))
print(' '.join(chars))

Total chars: 72
  ! " ' , . 0 1 2 3 4 5 6 7 8 9 : ; ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z


In [6]:
char_indices = dict((c, i) for i,c in enumerate(chars))
indices_char = dict((i, c) for i,c in enumerate(chars))

In [7]:
corpus_idx = [char_indices[c] for c in corpus]

# Preprocessing

In [8]:
maxlen = 40
x = []
y = []

In [9]:
for i in range(0, len(corpus_idx) - maxlen):
    x.append(corpus_idx[i:i+maxlen])
    y.append(corpus_idx[i+1:i+maxlen+1])
print('x:', len(x), 'y:', len(y))

x: 1277908 y: 1277908


In [10]:
x = np.concatenate([[np.array(i)] for i in x[:-1]])
y = np.concatenate([[np.array(i)] for i in y[:-1]])
print('x:', x.shape, 'y:', y.shape)

x: (1277907, 40) y: (1277907, 40)


In [11]:
print(x[1])
print(y[1])

[46 64 70  4  0 47 60 70  5  0 41 53 46 65  0 49 60  0 70 60 66  0 50 69 61
 50 48 65 18  0 38 53 50 70  3 63 50  0 64 46]
[64 70  4  0 47 60 70  5  0 41 53 46 65  0 49 60  0 70 60 66  0 50 69 61 50
 48 65 18  0 38 53 50 70  3 63 50  0 64 46 67]


# Model

In [12]:
model = Sequential()

model.add(Embedding(vocab_size, 24, input_length=maxlen))
model.add(LSTM(
            256,
            return_sequences=True,
            dropout_U=0.2,
            dropout_W=0.2,
            consume_less='gpu'))
model.add(Dropout(0.2))
model.add(LSTM(
            256,
            return_sequences=True,
            dropout_U=0.2,
            dropout_W=0.2,
            consume_less='gpu'))
model.add(TimeDistributed(Dense(vocab_size)))
model.add(Activation('softmax'))

In [13]:
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=Adam()
)

In [14]:
model.optimizer.lr = 0.001

In [15]:
def print_example(start, n_char, maxlen):
    for i in range(n_char):
        start_idx = [char_indices[c] for c in start]
        z = sequence.pad_sequences([start_idx], maxlen=maxlen, truncating='pre')
        preds = model.predict(z, verbose=0)[0][-1]
        next_char = np.random.choice(chars, p=preds)
        start = start + next_char
    print(start)

In [16]:
def train():
    model.fit(
        x,
        np.expand_dims(y, -1),
        batch_size=64,
        nb_epoch=1,
        shuffle=True
    )

In [17]:
for i in range(5):
    print('STEP', i+1)
    train()
    print_example('Winter is coming.', 400, maxlen)
    print()

STEP 1
Epoch 1/1
Winter is coming. Wait since the best of clamper have been dream to be happy, at Jon Snow, they? You're done in the end of my fighting. What did you die flew will think I can be not a nage and's word to do this cell. This most of my sons religions. My mother's a king as you've been in the death is time. She didn't you remember. What's gone. I'm long how will be the world, how did I efjisted lying. Otherwing who gu

STEP 2
Epoch 1/1
Winter is coming. She says come and the saddown is dead. Sometimes, it was the Master Commander of our own grandsonn. He was back. My dragonslop! Will you bloody lotting to run these long stranger, like, but uncle Makgaery. You recognize the name is or I has enough with Robert lick other table missions. What you'll have a good after good to hear a treason? Here's a senister on the King boy the hand. My lord. I dem

STEP 3
Epoch 1/1
Winter is coming. One words matter. Oh, a realm, they are, now I always always been sift. This is drunk, too. I

In [18]:
model.save_weights('data/model_5.hf5')

In [19]:
for i in range(10):
    print('STEP', i+1)
    train()
    print_example('Winter is coming.', 400, maxlen)
    print()

STEP 1
Epoch 1/1
Winter is coming. My mother's will be? I will be kings cried. A girl and you dreaming the Numh little brother's place? Save me to mein it brother. He's been doing for defends, Princess. I have to steal now room. Do you know why? What so happy. The Harpy have saying it. Noble mine of his way. You're not the mittle birth more than you all this woman of the right. I must make me a day if he trying in my experience wi

STEP 2
Epoch 1/1
Winter is coming. The Wall is my last lords of my bear in a crime. Very young time, lady! You march us when this is my son in many arms. People bitch. I must tell them this woman. You had a whore. Cut him and depend them all any man. We have to know part of Master Aegon's old hands blood of Lord Tywin Lannister Aegon in him. Why are you swallowing to Castle Black, don't think I all right for each other. Has caved 

STEP 3
Epoch 1/1
Winter is coming. Stannis saw it through them. He's ready good to the world together right. Clear the sellsword

In [20]:
model.save_weights('data/model_15.hf5')