In [2]:
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
# 讀取唐詩檔案，將詩句取出
import json
poetry = []
with open('poet.tang.0.json' , 'r') as json_read:
    jf = json.load(json_read)
for i in range(0,len(jf)):
    line = jf[i]["paragraphs"]
    poetry.append(line)
    
with open('poet.tang.1000.json' , 'r') as json_read:
    jf = json.load(json_read)
for i in range(0,len(jf)):
    line = jf[i]["paragraphs"]
    poetry.append(line)
    
# 加上結尾記號
#for i in range(len(poetry)):
#    poetry[i] = list(poetry[i]) + ["E"]
    
print(len(poetry))
print(poetry[2])

# 把一首詩的句子結合在一行，去掉標點符號
data = []
for i in range(len(poetry)):
    line = "".join(poetry[i])
    line = line.replace("，","").replace("。","").replace("！","").replace("？","").replace("[","").replace("]","")
    line = line.replace("{","").replace("}","")
    data.append(line)
print(data[2])

2000
['移步出詞林，停輿欣武宴。', '琱弓寫明月，駿馬疑流電。', '驚雁落虛弦，啼猿悲急箭。', '閱賞誠多美，於茲乃忘倦。']
移步出詞林停輿欣武宴琱弓寫明月駿馬疑流電驚雁落虛弦啼猿悲急箭閱賞誠多美於茲乃忘倦


In [4]:
# 將所有出現過的字建立字典，每個字元與標點符號都有對應數字
chars = []
for i in range(len(data)):
    chars_temp = sorted(list(set(data[i])))
    chars.extend(chars_temp)
chars = set(chars)
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))
print(char_indices['無'])

total chars: 4281
3915


In [5]:
# 前五個字預測第六個字，每次前進兩個字
maxlen = 5
step = 2
sentences = []
next_chars = []
for lines in range(len(data)):
    for i in range(0, len(data[lines]) - maxlen, step):
        sentences.append(data[lines][i: i + maxlen])
        next_chars.append(data[lines][i + maxlen])
print('nb sequences:', len(sentences))
print(sentences[40])
print(next_chars[40])

nb sequences: 45572
武宴琱弓寫
明


In [6]:
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Vectorization...


In [7]:
# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars), activation='softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

Build model...


In [8]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)
        count = 0
        
        # sentence = 隨機選一段長度為5的句子
        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(35):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            
            if count%10 == 0:
                sys.stdout.write("，")
            elif count%5 == 0:
                sys.stdout.write("。")
            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
            count += 1
        sys.stdout.write("。")
        print()

In [9]:
text = ''.join(data)

In [52]:
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=30,
          callbacks=[print_callback])

Epoch 1/30

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "新賭得五陵"
新賭得五陵，日一年腸斷。斜隴頭笑年，不隴頭遠戰。盡不相通中，一人一生死。生長安憂一，來萬里誰莫。
----- diversity: 0.5
----- Generating with seed: "新賭得五陵"
新賭得五陵，日一年腸斷。斜夕初昔更，是初氣今日。都家火應須，入醉燒子百。過側玉宮門，門隨金無靜。
----- diversity: 1.0
----- Generating with seed: "新賭得五陵"
新賭得五陵，萋一堤豈鬯。舞贈殊鮮滋，謝臨了則帳。蜀繫報逢河，闕驅種西孤。開入叢少持，曾環渭表央。
----- diversity: 1.2
----- Generating with seed: "新賭得五陵"
新賭得五陵，推之屈流臺。屋白渚鄰愛，尋天在下秩。尺杖京罄危，靜功冬嗟歲。身雷悔嫌纓，土桃英昏尋。
Epoch 2/30

----- Generating text after Epoch: 1
----- diversity: 0.2
----- Generating with seed: "顏豔名都綠"
顏豔名都綠，條城素寶無。爲無寶華香，聞死上宿江。南南望平日，日不此不見。人如知何名，上不歸見有。
----- diversity: 0.5
----- Generating with seed: "顏豔名都綠"
顏豔名都綠，條口素王度。驅道火沙來，飲馬出門門。不盡聞金石，枕清暗去行。浪水山願作，行行時是屋。
----- diversity: 1.0
----- Generating with seed: "顏豔名都綠"
顏豔名都綠，雲榮故飜持。尋遍齊直鐵，痕古戍顧幾。浩徒意恩實，窈宣噴蓮口。由鴦各倫凱，帝入父三光。
----- diversity: 1.2
----- Generating with seed: "顏豔名都綠"
顏豔名都綠，條北雖收笛。至南欲得眺，自恩變溢勤。胡萬了矣余，得即斜滴侍。畢點關長淮，好湖不嗁住。
Epoch 3/30

----- Generating text after Epoch: 2
-----

泰山之下婦，直掌光鬼承。誠片至臨遵，月京擣備此。鱗神酣單煌，近取旌浮州。敢鴻旦圖充，十憑冬貽披。
Epoch 16/30

----- Generating text after Epoch: 15
----- diversity: 0.2
----- Generating with seed: "到市橋步野"
到市橋步野，相見相如稀。曲遊樂不能，新未央宮入。愁願因中雲，今樂敢歡宴。芳懷白露高，舊樓神其止。
----- diversity: 0.5
----- Generating with seed: "到市橋步野"
到市橋步野，消衛去遊宮。柏中前然弦，無生人一軍。杯且胡爲行，國陰軍起太。虜星風生得，舞庭霜前安。
----- diversity: 1.0
----- Generating with seed: "到市橋步野"
到市橋步野，能咸比惟憐。破驚啼有但，殺臺哉鬯穹。軍反別綵咽，泣虔恤誠歡。沒翁願船霑，路爲根殊蘭。
----- diversity: 1.2
----- Generating with seed: "到市橋步野"
到市橋步野，穿鞭蓋君波。形廣羌終雲，躬養羌松魂。龍降窗嚬奏，寒總簾化烈。嬌喧測朔縈，四化躬鶯鴛。
Epoch 17/30

----- Generating text after Epoch: 16
----- diversity: 0.2
----- Generating with seed: "穴戍衣宵寢"
穴戍衣宵寢，二聲食符獻。一冠荷中方，被月寒明光。戈動秋時悲，花爲人不別。不得意妾相，思君不忘王。
----- diversity: 0.5
----- Generating with seed: "穴戍衣宵寢"
穴戍衣宵寢，二八食老聲。樂文敢賤在，空望裏裏如。在何處與妾，桑流衣應明。德上雍熙兮，將兄可見歸。
----- diversity: 1.0
----- Generating with seed: "穴戍衣宵寢"
穴戍衣宵寢，二征瞿樹儀。冠干飛木言，鄰意意少賤。不相延先房，宇詎寵放規。離暇去應何，疑用旄殷窺。
----- diversity: 1.2
----- Generating with seed: "穴戍衣宵寢"
穴戍衣宵寢，二都醉州窟。關山夏順隔，憂劇自疎風。屬戈急臣露，簾花紀倡家。婦養承富帝，子深惠君不。
E

千重碎迎風，一荷事小鉤。舟芙蓉長冷，芳搖漢八點。四二吳芳時，拂姑袖森無。蠁指應曾神，覆徵皎鱗風。
----- diversity: 1.2
----- Generating with seed: "千重碎迎風"
千重碎迎風，一香殿史因。豪懷麗烈裴，肉旅勝蕃銀。則垂辰分高，永疊鏘村至。存徹夜斾空，虜裝洽強澹。


<keras.callbacks.History at 0x7f6ffcd7cb00>

In [65]:
# 根據一個字產生詩
def predict_first(char, diversity=1):
        
        # sentence = 隨機選一段長度為4的句子放在前面
        start_index = random.randint(0, len(text) - maxlen - 1)
        generated = ''
        result = ''
        count = 1
        sentence = text[start_index: start_index + maxlen - 1]
        sentence += char
        generated += sentence
        print('開始文字：', char)
        result += char
        
        for i in range(1,40):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            
            if count%10 == 0:
                result += "。\n"
            elif count%5 == 0:
                result += "，"
            generated += next_char
            sentence = sentence[1:] + next_char

            result += next_char
            count += 1
        result += "。"
        
        return result

In [66]:
#藏頭詩
def predict_hide(line, diversity=1):
    
        # sentence = 隨機選一段長度為4的句子放在前面
        start_index = random.randint(0, len(text) - maxlen - 1)
        generated = ''
        result = ''
        count = 1
        sentence = text[start_index: start_index + maxlen - 1]
        sentence += line[0]
        generated += sentence
        print('藏頭句子：', line)
        result += line[0]
        
        for i in range(1,40):
            if count%10 == 0:
                result += "。\n"
            elif count%5 == 0:
                result += "，"
            
            # 加入藏頭的文字
            if count%10 == 0:
                generated += line[count//10]
                sentence = sentence[1:] + line[count//10]
                result += line[count//10]
                count += 1
                continue
                
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            result += next_char
            count += 1
        result += "。"
        
        return result

In [67]:
# 根據字首產生五言律詩
sen = predict_first('風')
print(sen)

開始文字： 風
風且送和耀，潔芙霞期行。
絳候有網揚，巧綠腰衛惜。
迎贈宮木濤，資血七凝颯。
均碣霜鬢酒，罄激楹豫當。


In [68]:
#藏頭詩，可藏四個字
sen = predict_hide('山窮水盡')
print(sen)

藏頭句子： 山窮水盡
山窗闈留滴，厥常罷棄商。
窮羽邁河賤，獻聊立況郎。
水馨北鳳早，分苔悲爭麗。
盡景運魚裝，兒泰直守三。
