In [1]:
# 数据分析/处理
import numpy as np
import pandas as pd
import re

# 搭建神经网络
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
from torch import optim
from torch.utils.data import Dataset,DataLoader

# 数据可视化
import matplotlib.pyplot as plt
import warnings

# word2vec
from gensim.models import Word2Vec


warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
# 验证cuda是否可用
cuda_available=torch.cuda.is_available()
device = torch.device("cuda" if cuda_available else "cpu")
if cuda_available:
    print("CUDA Device Name:", torch.cuda.get_device_name(0))
    print("CUDA Compute Capability:", torch.cuda.get_device_capability(0))
# 宇宙的答案
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x228988f0410>

### 读入数据

由于是中文诗词，所以需要进行分词。还要进行数据清洗。

### 生成式语言模型建模建模

使用LSTM和GRU搭建语言模型。使用预测下一个token的方式生成，这种自回归生成方法是目前最主流的生成方法。

In [None]:
# 定义模型
class GRU(nn.Module):
    def __init__(self,d_model,hidden_size,num_layer,output_size,dropout=0.1,bid=False) -> None:
        super().__init__()
        self.d_model=d_model
        self.hidden_size=hidden_size
        self.layers=num_layer
        self.bid=bid
        
        self.rnn=nn.GRU(d_model,hidden_size,num_layer,\
                        batch_first=True,dropout=dropout,bidirectional=bid)
        self.classfier=nn.Sequential(
            nn.Linear(hidden_size,output_size),
            nn.ReLU(),
            nn.Softmax(output_size)
        )

    def forward(self,X):
        if(self.bid==True):
            h0=torch.zeros(X.size(0),2*self.num_layer,self.hidden_size)
        else:
            h0=torch.zeros(X.size(0),self.num_layer,self.hidden_size)
        X,h_n=self.rnn(X,h0)
        out=self.classfier(X[:,-1,:])
        return out


class LSTM(nn.Module):
    def __init__(self,d_model,hidden_size,num_layer,output_size,dropout=0.1,bid=False) -> None:
        super().__init__()
        self.d_model=d_model
        self.hidden_size=hidden_size
        self.layer=num_layer
        self.bid=bid
        
        self.rnn=nn.LSTM(d_model,hidden_size,num_layer,\
                        batch_first=True,dropout=dropout,bidirectional=bid)
        # self.classfier=nn.Sequential(
        #     nn.Linear(hidden_size,output_size),
        #     nn.ReLU(),
        #     nn.Softmax(output_size)
        # )

    def forward(self,X):
        if(self.bid==True):
            h0=torch.zeros(2*self.layer,X.size(0),self.hidden_size)
            c0=torch.zeros(2*self.layer,X.size(0),self.hidden_size)
        else:
            h0=torch.zeros(self.layer,X.size(0),self.hidden_size)
            c0=torch.zeros(self.layer,X.size(0),self.hidden_size)
        X,(hn,cn)=self.rnn(X,(h0,c0))
        # out=self.classfier(X[:,-1,:])
        return X,hn,cn