
# 一、实验目的
1. 理解和掌握循环神经网络概念及在深度学习框架中的实现。
2. 掌握使用深度学习框架进行文本生成任务的基本流程：如数据读取、构造网络、训练和预测等。

# 二、 实验要求
1. 基于 Python 语言和任意一种深度学习框架（实验指导书中使用 Pytorch 框架
进行介绍） ，完成数据读取、网络设计、网络构建、模型训练和模型测试等过
程，最终实现一个可以自动写诗的程序。网络结构设计要有自己的方案，不
能与实验指导书完全相同。
2. 随意给出首句，如给定“湖光秋月两相和”，输出模型续写的诗句。也可以根
据自己的兴趣，进一步实现写藏头诗（不做要求） 。要求输出的诗句尽可能地
满足汉语语法和表达习惯。实验提供预处理后的唐诗数据集，包含 57580 首
唐诗（在课程网站下载） ，也可以使用其他唐诗数据集。
3. 按规定时间在课程网站提交实验报告、代码以及 PPT。


# 三、实验原理


In [13]:
import numpy as np
import torch
import torch.utils
import torch.utils.data

In [14]:

# prepare
'''
    data_loader  数据加载器
    ix2word      序号到词的映射
    word2ix      词到序号的映射
'''

dataset = np.load('./data/tang.npz', allow_pickle=True)
data = dataset['data']
ix2word = dataset['ix2word'].item()
word2ix = dataset['word2ix'].item()
data = torch.from_numpy(data)
data_loader = torch.utils.data.DataLoader(
    data,
    batch_size=16,
    shuffle=True,
    num_workers=0,
)

In [15]:
import torch.nn as nn
# class Poetry(nn.Module):
#     def __init__(self, vocab_size, embedding_dim, hidden_dim):
#         super(Poetry, self).__init__()
#         self.hidden_dim = hidden_dim
#         self.embedding = nn.Embedding(vocab_size, embedding_dim)
#         self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=2, batch_first=True)
#         self.fc = nn.Linear(hidden_dim, vocab_size)

#     def forward(self, input, hidden=None):

#         batch_size, seq_len = input.size()

#         if hidden is None:
#             h_0 = input.data.new(2, batch_size, self.hidden_dim).fill_(0).float()
#             c_0 = input.data.new(2, batch_size, self.hidden_dim).fill_(0).float()
#         else:
#             h_0, c_0 = hidden

#         embeds = self.embedding(input)
#         output, hidden = self.lstm(embeds, (h_0, c_0))
#         output = self.fc(output)
#         output = output.reshape(batch_size * seq_len, -1)
#         return output, hidden

class Poetry(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, dropout=0.5, pretrained_embeddings=None):
        super(Poetry, self).__init__()
        self.hidden_dim = hidden_dim
        self.embedding = nn.Embedding(vocab_size, embedding_dim)

        if pretrained_embeddings is not None:
            self.embedding.weight = nn.Parameter(pretrained_embeddings)
            self.embedding.weight.requires_grad = False

        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=2, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, vocab_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input, hidden=None):
        batch_size, seq_len = input.size()

        if hidden is None:
            h_0 = input.data.new(2, batch_size, self.hidden_dim).zero_().float()
            c_0 = input.data.new(2, batch_size, self.hidden_dim).zero_().float()
        else:
            h_0, c_0 = hidden

        embeds = self.dropout(self.embedding(input))
        output, hidden = self.lstm(embeds, (h_0, c_0))
        output = self.dropout(output)
        output = self.fc(output)
        output = output.reshape(batch_size * seq_len, -1)
        return output, hidden

In [16]:
from HyperPara import paraList
# hyper-parameters in paraList

model = Poetry(
    len(word2ix),
    embedding_dim=paraList.embedding_dim,
    hidden_dim=paraList.hidden_dim
)
optimizer = torch.optim.Adam(model.parameters(),lr=paraList.lr)
criterion = nn.CrossEntropyLoss()
loss_meter = 0


from tqdm import tqdm
def train(model, data_loader, optimizer, criterion):
    model.train()
    loss_meter = 0

    loop = tqdm(data_loader, total=len(data_loader))  # 使用 tqdm 创建进度条
    loop.set_description()

    for i, data in enumerate(loop):
        optimizer.zero_grad()
        data = data.long()
        input, target = data[:, :-1], data[:, 1:]
        output, _ = model(input)
        loss = criterion(output, target.reshape(-1))
        loss.backward()
        optimizer.step()
        loss_meter += loss.item()
        loop.set_description(f'Training Epoch [{i+1}/{len(data_loader)}]')
        loop.set_postfix(loss=loss_meter/(i+1))


def generate(model, start_words, ix2words, word2ix, max_length=100):
    model.train()
    result = list(start_words)
    start_words_len = len(start_words)
    input = torch.Tensor([word2ix['<START>']]).view(1, 1).long()
    hidden = None
    model.eval()

    with torch.no_grad():
        for i in range(max_length):
            output, hidden = model(input, hidden)
            if i < start_words_len:
                w = result[i]
                input = torch.Tensor([word2ix[w]]).view(1, 1).long()
            else:
                top_index = output.data.topk(1)[1].item()
                w = ix2words[top_index]
                result.append(w)
                input = torch.Tensor([top_index]).view(1, 1).long()
            if w == '<EOP>':
                del result[-1]
                break

    return ''.join(result)

In [17]:
EPOCH = 5
for epoch in range(EPOCH):
    print('====> Epoch: {}'.format(epoch+1))
    train(model, data_loader, optimizer, criterion)
    print('====> Say: ', end=' ')
    print(generate(model, '苟利国家生死以', ix2word, word2ix))

====> Epoch: 1


Training Epoch [3599/3599]: 100%|██████████| 3599/3599 [19:56<00:00,  3.01it/s, loss=2.63]


====> Say:  苟利国家生死以，不知不得不可知。
====> Epoch: 2


Training Epoch [3599/3599]: 100%|██████████| 3599/3599 [20:31<00:00,  2.92it/s, loss=2.44]


====> Say:  苟利国家生死以，不知不得无人情。不知不得无人事，不见金衣不可知。不知不得无人事，不见金衣不可知。
====> Epoch: 3


Training Epoch [3599/3599]: 100%|██████████| 3599/3599 [20:22<00:00,  2.94it/s, loss=2.35]


====> Say:  苟利国家生死以，不知天子不能知。一时不得无人事，不得无人不得知。不得不知无事事，不知何处不知君。
====> Epoch: 4


Training Epoch [3599/3599]: 100%|██████████| 3599/3599 [18:09<00:00,  3.30it/s, loss=2.31]


====> Say:  苟利国家生死以，不知何处不能知。不知此事无人事，不得无人不得知。不得不知无事事，不知何处是君心。
====> Epoch: 5


Training Epoch [3599/3599]: 100%|██████████| 3599/3599 [3:02:09<00:00,  3.04s/it, loss=2.27]      

====> Say:  苟利国家生死以，不知无事不能知。不知不得无人事，不是人间不可知。





In [18]:
def Acrostic_poetry_generator(start_word, ix2word, word2ix):
    # build model
    model = Poetry(len(word2ix), embedding_dim=paraList.embedding_dim, hidden_dim=paraList.hidden_dim)
    model.load_state_dict(torch.load(start_word))

    # save head
    result = []
    start_word_len = len(start_word)

    # first word tag
    input = (torch.Tensor([word2ix['<START>']]).view(1, 1).long())
    hidden = None

    index = 0
    pre_word = '<START>'

    for i in range(paraList.max_gen_len):
        output, hidden = model(input, hidden)
        top_index = output.data[0].topk(1)[1][0].item()
        w = ix2word[top_index]

        # generate head
        if (pre_word in {u'。', u'!', '<START>'}):
            if index == start_word_len:
                break
            else:
                w = start_word[index]
                index += 1
                input = (input.data.new([word2ix[w]])).view(1, 1)
        else:
            input = (input.data.new([word2ix[w]])).view(1, 1)
        result.append(w)
        pre_word = w
    return result



In [19]:
import os
save_dir = './saved_models'
    # build folder
os.makedirs(save_dir, exist_ok=True)
model_save_path = os.path.join(save_dir, 'model.pth')
# save model
torch.save(model.state_dict(), model_save_path)

print(f"model saved to {model_save_path}")

model saved to ./saved_models/model.pth


In [20]:
print(model)

Poetry(
  (embedding): Embedding(8293, 300)
  (lstm): LSTM(300, 256, num_layers=2, batch_first=True, dropout=0.5)
  (fc): Linear(in_features=256, out_features=8293, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)
