In [30]:
import keras
import numpy as np
from keras import layers
import random
import sys
from keras.callbacks import Callback

In [31]:
path = '/home/Bureaux/Project/Keras-DL-server/data/nietzsche.txt'
text = open(path).read().lower()
print('Corpus length:', len(text))

Corpus length: 600893


In [32]:
# Length of extracted character sequences
# 提取 60 个字符组成的序列
maxlen = 60

# We sample a new sequence every `step` characters
# 每 3 个字符采样一个新序列
step = 3

# This holds our extracted sequences（保存所提取的序列）
sentences = []

# This holds the targets (the follow-up characters)
# 保存目标（即下一个字符）
next_chars = []

for i in range(0, len(text) - maxlen, step):
    if i < 100:
        print("[sentences]", text[i:i + maxlen])
        print("[next]", text[i + maxlen])
    sentences.append(text[i:i + maxlen])
    next_chars.append(text[i + maxlen])

[sentences] preface


supposing that truth is a woman--what then? is the
[next] r
[sentences] face


supposing that truth is a woman--what then? is there 
[next] n
[sentences] e


supposing that truth is a woman--what then? is there not
[next]  
[sentences] 
supposing that truth is a woman--what then? is there not gr
[next] o
[sentences] pposing that truth is a woman--what then? is there not groun
[next] d
[sentences] sing that truth is a woman--what then? is there not ground
f
[next] o
[sentences] g that truth is a woman--what then? is there not ground
for 
[next] s
[sentences] hat truth is a woman--what then? is there not ground
for sus
[next] p
[sentences]  truth is a woman--what then? is there not ground
for suspec
[next] t
[sentences] uth is a woman--what then? is there not ground
for suspectin
[next] g
[sentences]  is a woman--what then? is there not ground
for suspecting t
[next] h
[sentences]  a woman--what then? is there not ground
for suspecting that
[next]  
[sentences] woma

In [33]:
print('Number of sequences:', len(sentences))
chars = sorted(list(set(text)))
print(chars)
print('Unique characters:', len(chars))
# Dictionary mapping unique characters to their index in `chars`
# 一个字典，将唯一字符映射为它在列表 chars 中的索引

Number of sequences: 200278
['\n', ' ', '!', '"', "'", '(', ')', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '=', '?', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'ä', 'æ', 'é', 'ë']
Unique characters: 57


In [34]:
char_indices = dict((char, chars.index(char)) for char in chars)
print(char_indices)

{'\n': 0, ' ': 1, '!': 2, '"': 3, "'": 4, '(': 5, ')': 6, ',': 7, '-': 8, '.': 9, '0': 10, '1': 11, '2': 12, '3': 13, '4': 14, '5': 15, '6': 16, '7': 17, '8': 18, '9': 19, ':': 20, ';': 21, '=': 22, '?': 23, '[': 24, ']': 25, '_': 26, 'a': 27, 'b': 28, 'c': 29, 'd': 30, 'e': 31, 'f': 32, 'g': 33, 'h': 34, 'i': 35, 'j': 36, 'k': 37, 'l': 38, 'm': 39, 'n': 40, 'o': 41, 'p': 42, 'q': 43, 'r': 44, 's': 45, 't': 46, 'u': 47, 'v': 48, 'w': 49, 'x': 50, 'y': 51, 'z': 52, 'ä': 53, 'æ': 54, 'é': 55, 'ë': 56}


In [35]:
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1  #ont hot
    y[i, char_indices[next_chars[i]]] = 1  #ont hot

Vectorization...


In [36]:
model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 128)               95232     
_________________________________________________________________
dense_3 (Dense)              (None, 57)                7353      
Total params: 102,585
Trainable params: 102,585
Non-trainable params: 0
_________________________________________________________________


In [37]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


In [38]:
class ModelHistory(Callback):
    def __init__(self, epochs):
        self.epochs = epochs

    def on_train_begin(self, logs={}):
        self.losses = []
        print('start train...')

    def on_epoch_end(self, epoch, logs={}):
        self.losses.append(logs.get('loss'))
        #这个地方要加1
        process = float(epoch + 1) / float(self.epochs) * 100
        print("[训练进度]" + str(process) + "loss：" + str(logs.get('loss')))
        print("本轮预测结果:")
        self._get_predict(self.model)

    def on_train_end(self, logs={}):
        print("end train...")
        print(self.losses[len(self.losses) - 1])

    def _get_predict(self, model):
        start_index = random.randint(0, len(text) - maxlen - 1)
        print("[start_index]", start_index)
        generated_text = text[start_index: start_index + maxlen]
        print("[generated_text]", generated_text)

        for temperature in [0.2, 0.5, 1.0, 1.2]:
            print('------ temperature:', temperature)
            sys.stdout.write(generated_text)

            for i in range(300):
                sampled = np.zeros((1, maxlen, len(chars)))
                for t, char in enumerate(generated_text):
                    sampled[0, t, char_indices[char]] = 1.
                preds = model.predict(sampled, verbose=0)[0]
                next_index = sample(preds, temperature)
                next_char = chars[next_index]

                generated_text += next_char
                generated_text = generated_text[1:]

                sys.stdout.write(next_char)
                sys.stdout.flush()
            print()

In [None]:
# for epoch in range(1, 60):
#     print('epoch', epoch)
# Fit the model for 1 epoch on the available training data
history = ModelHistory(60)
save_model = keras.callbacks.ModelCheckpoint(
    "/home/Bureaux/Project/Keras-DL-server/models/gen_weights.{epoch:02d}.hdf5",
    monitor='loss', verbose=0, save_best_only=True, save_weights_only=True, mode='auto', period=1)
model.fit(x, y, batch_size=256, epochs=60, callbacks=[history,save_model])

# start_index = random.randint(0, len(text) - maxlen - 1)
# print("[start_index]",start_index)
# generated_text = text[start_index: start_index + maxlen]
# print("[generated_text]",generated_text)
#
# for temperature in [0.2, 0.5, 1.0, 1.2]:
#     print('------ temperature:', temperature)
#     sys.stdout.write(generated_text)
#
#     for i in range(300):
#         sampled = np.zeros((1, maxlen, len(chars)))
#         for t, char in enumerate(generated_text):
#             sampled[0, t, char_indices[char]] = 1.
#         preds=model.predict(sampled,verbose=0)[0]
#         next_index = sample(preds, temperature)
#         next_char = chars[next_index]
#
#         generated_text += next_char
#         generated_text = generated_text[1:]
#
#         sys.stdout.write(next_char)
#         sys.stdout.flush()
#     print()

start train...
Epoch 1/60
[训练进度]1.6666666666666667loss：2.0826249550225024
本轮预测结果:
[start_index] 461429
[generated_text] the same
as those of the average beholder of cameos: people 
------ temperature: 0.2
the same
as those of the average beholder of cameos: people and the distine that the detine the distine that the spirit and the down to be and the comple and the scirness and the distrance and the decertion to the spering to be not intellong to be and the dount to be and the spirit and the spirits of the domen and the comple something to the spirits and the 
------ temperature: 0.5
f the domen and the comple something to the spirits and the spirits of the sore and the doble to the proous himself with the suined so caring for the intrifice of the stroul and capustable, forture to the under the sympally the does and to the sunce and will the were been and complet to interough of the speren and make dirgard to be a mare and good intelligi
------ temperature: 1.0
 the speren and make dirg

  This is separate from the ipykernel package so we can avoid doing imports until


ter. the former responcess, one case of self
all
the
impature the intellivors
that cames than one. it is perhaps is; though to make the 
but the whole peoplty, the surideness" one hy fact--and mostnal hugables of
the fact, althehing-satisfaction religious
------ temperature: 1.2
stnal hugables of
the fact, althehing-satisfaction religious, worlded differ ousure!--there is drother, has been tweces, our ? west- also" has
naturaling, frespoments" with it?--ifthingzed emoors is carm a sense of the perficried. then, therefore, arding, doold by seeks some minday;--whoe your own shaltific enflien oftygletite
so its oveutene and 'suizes
mig
Epoch 16/60
[训练进度]26.666666666666668loss：1.3139834350862443
本轮预测结果:
[start_index] 50100
[generated_text] tself
hitherto, the will to knowledge on the foundation of a
------ temperature: 0.2
tself
hitherto, the will to knowledge on the foundation of a man the more sense of the sense of the sense of the ear the deeperness of the conscience and consequence of 