In [21]:
from __future__ import print_function
from keras.layers import Dense, SimpleRNN, Activation
from keras.models import Sequential
import matplotlib.pyplot as plt
import numpy as np

In [22]:
fin = open('Carroll Lewis. Alices adventures in Wonderland - BooksCafe.Net.txt', 'rb')
lines = []
for line in fin:
    line = line.strip().lower()
    line = line.decode('ascii', 'ignore')
    if len(line) == 0:
        continue
    lines.append(line)
fin.close()
text = ' '.join(lines)

In [23]:
chars = set([c for c in text])
nb_chars = len(chars)
char2index = dict((c, i) for i, c in enumerate(chars))
index2char = dict((i, c) for i, c in enumerate(chars))

In [24]:
SEQLEN = 10
STEP = 1

input_chars = []
label_chars = []
for i in range(0, len(text) - SEQLEN, STEP):
    input_chars.append(text[i:i + SEQLEN])
    label_chars.append(text[i + SEQLEN])

In [25]:
X = np.zeros((len(input_chars), SEQLEN, nb_chars), dtype=np.bool_)
y = np.zeros((len(input_chars), nb_chars), dtype=np.bool_)
for i, input_char in enumerate(input_chars):
    for j, ch in enumerate(input_char):
        X[i, j, char2index[ch]] = 1
    y[i, char2index[label_chars[i]]] = 1

In [26]:
HIDDEN_SIZE = 128
BATCH_SIZE = 128
NUM_ITERATIONS = 25
NUM_EPOCHS_PER_ITERATION = 1
NUM_PREDS_PER_EPOCH = 100

In [34]:
model = Sequential()
model.add(SimpleRNN(HIDDEN_SIZE, return_sequences = False, input_shape=(SEQLEN, nb_chars), unroll=True))
model.add(Dense(nb_chars))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

In [35]:
for iteration in range(NUM_ITERATIONS):
    print('=' * 50)
    print('Iteration #: %d' % (iteration))
    model.fit(X, y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS_PER_ITERATION)
    
    test_idx = np.random.randint(len(input_chars))
    test_chars = input_chars[test_idx]
    print('Generating from seed: %s' % (test_chars))
    print(test_chars, end='')
    for i in range(NUM_PREDS_PER_EPOCH):
        Xtest = np.zeros((1, SEQLEN, nb_chars))
        for i, ch in enumerate(test_chars):
            Xtest[0, i, char2index[ch]] = 1
        pred = model.predict(Xtest, verbose=0)[0]
        ypred = index2char[np.argmax(pred)]
        print(ypred, end='')
        # сдвинуться вперед на test_chars + ypred
        test_chars = test_chars[1:] + ypred
    print()

Iteration #: 0
[1m1108/1108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2.5745
Generating from seed: l must hav
l must have the soon the sald the sald the sald the sald the sald the sald the sald the sald the sald the sald
Iteration #: 1
[1m1108/1108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2.0621
Generating from seed: ll make yo
ll make you don the mad the was see to the was see to the was see to the was see to the was see to the was see
Iteration #: 2
[1m1108/1108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1.9480
Generating from seed: own busine
own busine the said the dong the said the dong the said the dong the said the dong the said the dong the said 
Iteration #: 3
[1m1108/1108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1.8681
Generating from seed:  toast,) s
 toast,) she had the cane the came of the came of the came of the came of the came of the came of the came 