<h1 align="center">LSTM 唐詩生成 Trained by 全唐詩</h1>
<hr>
<p><a href="全唐詩.txt">全唐詩.txt</a></p>
<hr>
<pre>
每次輸入一個 word vector 的數值（input_size=32）
連續輸入五個 word vectors（seq_len=5）
預測下一個 word vector（output_size=32）
</pre>

<hr>
<h3>載入所有 Word Vectors</h3>

In [None]:
# 載入所有 Word Vectors

import pickle

with open('word_vec.pkl', 'rb') as fp:
    word_vec = pickle.load(fp)
fp.close()

i = 0
for c in word_vec:
    print(c, word_vec[c])
    i = i + 1
    if (i == 10):
        break


<hr>
<h3>載入全唐詩文本</h3>
<p style="color:red">全唐詩（單字）.txt</p>

In [None]:
# 載入全唐詩文本

with open('全唐詩（單字）.txt', 'r', encoding='utf-8') as fp:
    txt = fp.read()
fp.close()

n = 0
for c in txt:
    if (c != ' '):
        n = n + 1

print('總字數：', n)


<hr>
<h3>產生 Corpus of Word Vectors</h3>
<pre>
total sequence length = 2,563,538
input_tensor shape = (batch, seq_len, input_size)
batch = 128
seq_len = 5
input_size = 32 (dim of input feature vector)
</pre>

In [None]:
# 產生 Corpus of Word Vectors

import numpy as np

corpus = np.zeros([n,input_size], dtype=np.float32)

i = 0
for c in txt:
    if (c != ' '):
        corpus[i] = word_vec[c]
        i = i + 1

print(corpus.shape)


In [None]:
idx = np.random.randint(0, n-1)
print(idx)
print(corpus[idx])

<hr>
<h3>LSTM Parameters</h3>

In [None]:
# LSTM Parameters

input_size = 32 # 輸入特徵數目
batch = 1024 # 128 # 訓練樣本數
seq_len = 5 # 連續送幾個信號後輸出預測
hidden_size = 1024 # 隱藏層細胞數
num_layers = 3 # 隱藏層層數
output_size = 32 # 輸出數值數目


<hr>
<h3>先準備好第一組 batch 訓練樣本（PyTorch Tensor）</h3>

In [None]:
# 先準備好第一組 batch 訓練樣本（PyTorch Tensor）

import numpy as np
import torch

x = torch.zeros([batch,seq_len,input_size])
y = torch.zeros([batch,output_size])

for i in range(batch):
    for j in range(seq_len):
        x[i,j,:] = torch.from_numpy(corpus[i+j])
    y[i,:] = torch.from_numpy(corpus[i+seq_len])

print(x.shape)
print(y.shape)


<hr>
<h3>定義與建立 LSTM 模型</h3>
<p style="color:red">batch_first=True</p>

In [None]:
# 定義與建立 LSTM 模型

import torch
import torch.nn as nn

class MyLSTM(nn.Module):
    def __init__(self, input_size):
        super(MyLSTM, self).__init__()
        self.lstm = nn.LSTM(
            input_size = input_size,
            hidden_size = hidden_size,
            num_layers = num_layers,
            batch_first = True
        )
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, output_size)
        )
        self.h0 = torch.randn(num_layers, batch, hidden_size)
        self.c0 = torch.randn(num_layers, batch, hidden_size)
    def forward(self, x):
        h_out, (hn,cn) = self.lstm(x, (self.h0,self.c0))
        out = self.fc(h_out)
        return out

lstm = MyLSTM(input_size)

print(lstm)

# params = list(lstm.parameters())
# print(params)


<hr>
<h3>單一 batch 樣本 LSTM Training</h3>

In [None]:
# 單一 batch 樣本 LSTM Training

# Loss function
loss_func = nn.MSELoss()
# Optimizer
optimizer = torch.optim.Adam(lstm.parameters(), lr=0.001)

EPOCH = 100

for epoch in range(EPOCH):
    optimizer.zero_grad()
    # shuffling
    permute = torch.randperm(x.size()[0])
    xi = x[permute]
    yi = y[permute]
    # feed forward
    output = lstm(xi)
    # print(output[:,-1,:].shape)
    # print(yi.shape)
    # evaluating loss
    loss = loss_func(output[:,-1,:], yi)
    # display loss
    if (epoch % 1 == 0):
        print('epoch = %5d, loss = %16.12f' % (epoch, loss.item()))
    # feed backward
    loss.backward()
    # update parameters
    optimizer.step()

torch.save(lstm.state_dict(), 'lstm_poems.model')


<hr>
<h3>整個文本逐 batch 樣本 LSTM Training</h3>

In [None]:
# 整個文本逐 batch 樣本 LSTM Training

x = torch.zeros([batch,seq_len,input_size])
y = torch.zeros([batch,output_size])

# Loss function
loss_func = nn.MSELoss()
# Optimizer
optimizer = torch.optim.Adam(lstm.parameters(), lr=0.001)

EPOCH = 100

for epoch in range(EPOCH):
    loss_total = 0
    bn = 0
    for base in range(0, n-batch+1-seq_len, batch):
        for i in range(batch):
            for j in range(seq_len):
                x[i,j,:] = torch.from_numpy(corpus[base+i+j])
            y[i,:] = torch.from_numpy(corpus[base+i+seq_len])
        optimizer.zero_grad()
        # shuffling
        permute = torch.randperm(x.size()[0])
        xi = x[permute]
        yi = y[permute]
        # feed forward
        output = lstm(xi)
        # print(output[:,-1,:].shape)
        # print(yi.shape)
        # evaluating loss
        loss = loss_func(output[:,-1,:], yi)
        loss_total = loss_total + loss.item()
        bn = bn + 1
        # feed backward
        loss.backward()
        # update parameters
        optimizer.step()
        # display status
        if (bn % 10 == 0):
            print('base = %8d, loss = %16.12f' % (base, loss.item()))
    # display loss
    loss = loss_total / bn
    if (epoch % 1 == 0):
        print('epoch = %5d, loss = %16.12f' % (epoch, loss))

torch.save(lstm.state_dict(), 'lstm_poems.model')


<hr>
<h3 style="color:orange">LSTM 全唐詩文本訓練（整合版，含 GPU 加速）</h3>

In [None]:
# LSTM 全唐詩文本訓練（整合版）

# LSTM Parameters

input_size = 32 # 輸入特徵數目
batch = 1024 # 128 # 訓練樣本數
seq_len = 5 # 連續送幾個信號後輸出預測
hidden_size = 1024 # 隱藏層細胞數
num_layers = 3 # 隱藏層層數
output_size = 32 # 輸出數值數目

# 載入所有 Word Vectors

import pickle

with open('word_vec.pkl', 'rb') as fp:
    word_vec = pickle.load(fp)
fp.close()

# 載入全唐詩文本

with open('全唐詩（單字）.txt', 'r', encoding='utf-8') as fp:
    txt = fp.read()
fp.close()

n = 0
for c in txt:
    if (c != ' '):
        n = n + 1
print('總字數：', n)

# 產生 Corpus of Word Vectors

import numpy as np

corpus = np.zeros([n,input_size], dtype=np.float32)

i = 0
for c in txt:
    if (c != ' '):
        corpus[i] = word_vec[c]
        i = i + 1

# print(corpus.shape)

# GPU 偵測

import torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(device)

# 定義與建立 LSTM 模型

import torch
import torch.nn as nn

class MyLSTM(nn.Module):
    def __init__(self, input_size):
        super(MyLSTM, self).__init__()
        self.lstm = nn.LSTM(
            input_size = input_size,
            hidden_size = hidden_size,
            num_layers = num_layers,
            batch_first = True
        )
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, output_size)
        )
        self.h0 = torch.randn(num_layers, batch, hidden_size).to(device)
        self.c0 = torch.randn(num_layers, batch, hidden_size).to(device)
    def forward(self, x):
        h_out, (hn,cn) = self.lstm(x, (self.h0,self.c0))
        out = self.fc(h_out)
        return out

lstm = MyLSTM(input_size).to(device)

# print(lstm)

# 整個文本逐 batch 樣本 LSTM Training

# Loss function
loss_func = nn.MSELoss()
# Optimizer
optimizer = torch.optim.Adam(lstm.parameters(), lr=0.001)

EPOCH = 1000

# LSTM Training

x = torch.zeros([batch,seq_len,input_size]).to(device)
y = torch.zeros([batch,output_size]).to(device)

for epoch in range(EPOCH):
    loss_total = 0
    bn = 0
    for base in range(0, n-batch+1-seq_len, batch):
        for i in range(batch):
            for j in range(seq_len):
                x[i,j,:] = torch.from_numpy(corpus[base+i+j])
            y[i,:] = torch.from_numpy(corpus[base+i+seq_len])
        optimizer.zero_grad()
        # shuffling
        permute = torch.randperm(x.size()[0])
        xi = x[permute]
        yi = y[permute]
        # feed forward
        output = lstm(xi)
        # print(output[:,-1,:].shape)
        # print(yi.shape)
        # evaluating loss
        loss = loss_func(output[:,-1,:], yi)
        loss_total = loss_total + loss.item()
        bn = bn + 1
        # feed backward
        loss.backward()
        # update parameters
        optimizer.step()
        # display status
        if (bn % 10 == 0):
            print('base = %8d, loss = %16.12f' % (base, loss.item()))
    # save model
    if (epoch % 100 == 0):
        torch.save(lstm.state_dict(), 'lstm_poems_%05d.model'%(epoch))
    # display loss
    loss = loss_total / bn
    if (epoch % 1 == 0):
        print('epoch = %5d, loss = %16.12f' % (epoch, loss))

torch.save(lstm.state_dict(), 'lstm_poems.model')


<hr>
<h3>測試（第 04 組參數）</h3>

In [None]:
# 測試

# LSTM Parameters

input_size = 32 # 輸入特徵數目
batch = 128 # 256 # 1024 # 訓練樣本數
seq_len = 5 # 連續送幾個信號後輸出預測
hidden_size = 1024 # 2048 # 256 # 隱藏層細胞數
num_layers = 4 # 隱藏層層數
output_size = 32 # 輸出數值數目

# 定義 LSTM

import torch
import torch.nn as nn

class MyLSTM(nn.Module):
    def __init__(self, input_size):
        super(MyLSTM, self).__init__()
        self.lstm = nn.LSTM(
            input_size = input_size,
            hidden_size = hidden_size,
            num_layers = num_layers,
            batch_first = True
        )
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, output_size)
        )
        self.h0 = torch.randn(num_layers, batch, hidden_size)
        self.c0 = torch.randn(num_layers, batch, hidden_size)
    def forward(self, x):
        h_out, (hn,cn) = self.lstm(x, (self.h0,self.c0))
        out = self.fc(h_out)
        return out

lstm = MyLSTM(input_size)

# 載入 LSTM

mfile = 'lstm_poems_00300.model'
lstm.load_state_dict(torch.load(mfile, map_location='cpu'))
lstm.eval()
print('Load previous nn model completely!')


<hr>
<h3>載入所有 Word Vectors</h3>

In [None]:
# 載入所有 Word Vectors

import pickle

with open('word_vec.pkl', 'rb') as fp:
    word_vec = pickle.load(fp)
fp.close()


<hr>
<h3>載入 word2vec model (char2vec)</h3>

In [None]:
# 載入 word2vec model (char2vec)

import gensim

print(gensim.__version__)

model = gensim.models.Word2Vec.load('poems.model.bin')


<hr>
<h3>提示靈感啟動</h3>

In [None]:
# 提示靈感啟動

s = '疑是地上霜'

x = torch.zeros([batch,seq_len,input_size])

for b in range(batch):
    i = 0
    for c in s:
        x[b,i] = torch.from_numpy(word_vec[c])
    i = i + 1

print(x.shape)

output = lstm(x)
output = output[:,-1,:].detach()
print(output.shape)
v = output[0].numpy()
print(v)

p = model.wv.similar_by_vector(v, topn=1)
print(p[0][0])


<hr>
<h3>五言絕句</h3>

In [None]:
# 五言絕句

for i in range(20):
    for b in range(batch):
        for k in range(1, seq_len):
            x[b,k-1] = x[b,k]
        x[b, seq_len-1] = output[b]
    output = lstm(x)
    output = output[:,-1,:].detach()
    # print(output.shape)
    v = output[0].numpy()
    p = model.wv.similar_by_vector(v, topn=1)
    print(p[0][0], end='')
    if (i % 5 == 4):
        print()
