In [14]:
from keras.layers import Dense, Activation
from keras.layers.recurrent import SimpleRNN
from keras.models import Sequential
from keras.utils.vis_utils import plot_model
import numpy as np

In [15]:
file = open("AliceWonderland.txt",'rb')

In [16]:
lines=[]
for line in file:
    line = line.strip().lower()              #删除空白符（包括'\n', '\r',  '\t',  ' ') 并转小写
    line = line.decode("ascii","ignore")
    if len(line)==0:
        continue
    lines.append(line)
file.close()
text = " ".join(lines)

In [28]:
chars = set([c for c in text])
nb_chars = len(chars)
char2index = dict((c,i) for i,c in enumerate(chars))
index2char = dict((i,c) for i,c in enumerate(chars))
index2char

{0: '$',
 1: 'e',
 2: '@',
 3: '-',
 4: '3',
 5: '5',
 6: ',',
 7: 'z',
 8: ']',
 9: '9',
 10: ')',
 11: '*',
 12: '.',
 13: '8',
 14: '(',
 15: 't',
 16: 'q',
 17: 'f',
 18: 'l',
 19: ';',
 20: '_',
 21: 'h',
 22: 'o',
 23: 'v',
 24: 'y',
 25: 'c',
 26: 'm',
 27: '6',
 28: 'w',
 29: '0',
 30: 'p',
 31: 'j',
 32: 'k',
 33: '?',
 34: 'u',
 35: '!',
 36: ' ',
 37: 'r',
 38: '[',
 39: '/',
 40: '1',
 41: '#',
 42: 's',
 43: '7',
 44: '2',
 45: 'i',
 46: 'n',
 47: ':',
 48: '%',
 49: '4',
 50: 'd',
 51: 'a',
 52: 'x',
 53: 'g',
 54: 'b'}

In [5]:
SEQLEN=10
STEP=1

In [6]:
input_chars = []
label_chars = []
for i in range(0, len(text)-SEQLEN, STEP):
    input_chars.append(text[i:i + SEQLEN])
    label_chars.append(text[i+SEQLEN])

In [7]:
X = np.zeros((len(input_chars),SEQLEN,nb_chars),dtype=np.bool)

In [8]:
y = np.zeros((len(input_chars), nb_chars), dtype=np.bool)

In [9]:
for i, input_char in enumerate(input_chars):
    for j, ch in enumerate(input_char):
        X[i,j,char2index[ch]] = 1
        y[i, char2index[label_chars[i]]] = 1

In [31]:
HIDDEN_SIZE = 128
BATCH_SIZE = 128
NUM_ITERATIONS = 10
NUM_EPOCHS_PER_ITERATION = 1
NUM_PREDS_PER_EPOCH = 100

In [32]:
model = Sequential()
model.add(SimpleRNN(HIDDEN_SIZE, return_sequences=False,
                   input_shape=(SEQLEN,nb_chars),
                   unroll=True))
model.add(Dense(nb_chars))
model.add(Activation("softmax"))
model.compile(loss = "categorical_crossentropy",optimizer="rmsprop")

In [33]:
for iteration in range(NUM_ITERATIONS):
    print("="*50)
    print("Iteration #:%d" %(iteration))
    model.fit(X,y,batch_size=BATCH_SIZE,epochs=NUM_EPOCHS_PER_ITERATION)
    
    test_idx = np.random.randint(len(input_chars))
    test_chars = input_chars[test_idx]
    print("Generating from seed: %s"%(test_chars))
    print(test_chars, end="")
    for i in range(NUM_PREDS_PER_EPOCH):
        Xtest = np.zeros((1,SEQLEN, nb_chars))
        for i,ch in enumerate(test_chars):
            Xtest[0,i,char2index[ch]]=1
        pred = model.predict(Xtest, verbose=0)[0]    
        ypred = index2char[np.argmax(pred)]
        print(ypred, end="")
        test_chars = test_chars[1:] + ypred
print()
    

Iteration #:0
Epoch 1/1
Generating from seed: tch her he
Iteration #:1
Epoch 1/1
Generating from seed: eal worse 
Iteration #:2
Epoch 1/1
Generating from seed: ittle hous
Iteration #:3
Epoch 1/1
Generating from seed: removed! t
Iteration #:4
Epoch 1/1
Generating from seed: done, thou
Iteration #:5
Epoch 1/1
Generating from seed: ful, beaut
Iteration #:6
Epoch 1/1
Generating from seed: erson of a
Iteration #:7
Epoch 1/1
Generating from seed:  chapter v
Iteration #:8
Epoch 1/1
Generating from seed: here goes 
Iteration #:9
Epoch 1/1
Generating from seed: on going i
on going in a little goon was the dormouse was so a little goon was the dormouse was so a little goon was the 
