In [1]:
import numpy as np
from keras.layers import Dense, Activation
from keras.layers.recurrent import SimpleRNN, LSTM, GRU
from keras.models import Sequential

Using TensorFlow backend.


In [2]:
with open("alice_in_wonderland.txt", 'rb') as _in:
    lines = []
    for line in _in:
        line = line.strip().lower().decode("ascii", "ignore")
        if len(line) == 0:
            continue
        lines.append(line)
text = " ".join(lines)
chars = set([c for c in text])
nb_chars = len(chars)

In [3]:
char2index = {c: i for i, c in enumerate(chars)}
index2char = {i: c for i, c in enumerate(chars)}

In [4]:
SEQLEN, STEP = 10, 1
input_chars, label_chars = [], []

In [5]:
for i in range(0, len(text) - SEQLEN, STEP):
    input_chars.append(text[i: i + SEQLEN])
    label_chars.append(text[i + SEQLEN])

In [6]:
X = np.zeros((len(input_chars), SEQLEN, nb_chars), dtype=np.bool)
y = np.zeros((len(input_chars), nb_chars), dtype=np.bool)
for i, input_char in enumerate(input_chars):
    for j, ch in enumerate(input_char):
        X[i, j, char2index[ch]] = 1
    y[i, char2index[label_chars[i]]] = 1

In [7]:
BATCH_SIZE, HIDDEN_SIZE = 128, 128
NUM_ITERATIONS = 25
NUM_EPOCHS_PER_ITERATION = 1
NUM_PREDS_PER_EPOCH = 100

In [8]:
model = Sequential()
model.add(
    SimpleRNN(  
        HIDDEN_SIZE,
        return_sequences=False,
        input_shape=(SEQLEN, nb_chars),
        unroll=True
    )
)
model.add(Dense(nb_chars))
model.add(Activation("softmax"))
model.compile(loss="categorical_crossentropy", optimizer="rmsprop")








In [9]:
for iteration in range(NUM_ITERATIONS):

    # For each iteration, run the model fitting procedure for a number of epochs.
    print("=" * 50)
    print("Iteration #: %d" % (iteration))
    model.fit(X, y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS_PER_ITERATION)

    # Select a random example input sequence.
    test_idx = np.random.randint(len(input_chars))
    test_chars = input_chars[test_idx]

    # For a number of prediction steps using the current version of the trained
    # model, construct a one-hot encoding of the test input and append a prediction.
    print("Generating from seed: %s" % (test_chars))
    print(test_chars, end="")
    for i in range(NUM_PREDS_PER_EPOCH):

        # Here is the one-hot encoding.
        X_test = np.zeros((1, SEQLEN, nb_chars))
        for j, ch in enumerate(test_chars):
            X_test[0, j, char2index[ch]] = 1

        # Make a prediction with the current model.
        pred = model.predict(X_test, verbose=0)[0]
        y_pred = index2char[np.argmax(pred)]

        # Print the prediction appended to the test example.
        print(y_pred, end="")

        # Increment the test example to contain the prediction as if it
        # were the correct next letter.
        test_chars = test_chars[1:] + y_pred
print()

Iteration #: 0
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Epoch 1/1
Generating from seed: n. oh, you
Iteration #: 1
Epoch 1/1
Generating from seed: n a very u
Iteration #: 2
Epoch 1/1
Generating from seed: ied to spe
Iteration #: 3
Epoch 1/1
Generating from seed: used to qu
Iteration #: 4
Epoch 1/1
Generating from seed: on. alice 
Iteration #: 5
Epoch 1/1
Generating from seed:  somersaul
Iteration #: 6
Epoch 1/1
Generating from seed: things bet
Iteration #: 7
Epoch 1/1
Generating from seed: rt is the 
Iteration #: 8
Epoch 1/1
Generating from seed: ustody and
Iteration #: 9
Epoch 1/1
Generating from seed: hurt the p
Iteration #: 10
Epoch 1/1
Generating from seed: re is anot
Iteration #: 11
Epoch 1/1
Generating from seed: er sister 
Iteration #: 12
Epoch 1/1
Generating from seed: a, declare
Iteration #: 13
Epoch 1/1
Generating from seed: n changed 
Iteration #: 14
Epoch 1/1
Generating from seed: t high, an
Iteration #: 15
Epoch 1/1
Gene