In [None]:
#9.1 门控循环单元GRU  

###(1)从零实现
import torch
from torch inport nn
from d2l import torch as d2l

#读取数据
batch_size,num_steps=32,35
train_iter,vocab=d2l.load_data_time_machine(batch_size,num_steps)

#初始化模型参数
def get_params(vocab_size,num_hiddens,device):
    num_inputs=num_outputs=vocab_size
    
    def normal(shape): #权重函数，从标准差为0.01的高斯分布中提前权重张量
        return torch.randn(size=shape,device=device)*0.01

    def three(): #参数初始化函数
        return (normal((num_inputs,num_hiddens)),
               normal((num_hiddens,num_hiddens)),
               torch.zeros(num_hiddens,device=device))
    
    W_xz,W_hz,b_z=three() #更新门参数
    W_xr,W_hr,b_r=three() #重置门参数
    W_xh,W_hh,b_h=three() #候选隐状态参数
    W_hq=normal((num_hiddens,num_outputs)),b_q=torch.zeros(num_outputs,device=device) #输出层参数
    #附加梯度
    params=[W_xz,W_hz,b_z,W_xr,W_hr,b_r,W_xh,W_hh,b_h,W_hq,b_q]
    for param in params:
        param.requires_grad(True)
    return params

#定义模型
#定义隐状态初始化函数
def init_gru_state(batch_size,num_hiddens,device): #隐状态初始化函数，返回一个形状为（批量大小，隐藏单元个数）的张量，张量的值全部为零
    return (torch.zeros((batch_size,num_hiddens),device=device),)
#定义GRU模型
def gru(inputs,state,params): #GRU前向传播函数
    W_xz,W_hz,b_z,W_xr,W_hr,b_r,W_xh,W_hh,b_h,W_hq,h_q=params
    H,=state
    outputs=[]
    for X in inputs:
        Z=torch.sigmoid((X @ W_xz)+(H @ W_hz)+b_z) #更新门输出值
        R=torch.sigmoid((X @ W_xr)+(H @ W_hr)+b_r) #重置门输出值
        H_tilda=torch.tanh((X @ W_xh)+((R * H) @ W_hh)+b_h) #候选隐状态
        H=Z * H+(1-Z)*H_tilda #隐状态
        Y=H @ W_hq+b_q #网络输出值
        outputs.append(Y)
    return torch.cat(outputs,dim=0),(H,)

#训练与预测
vocab_size,num_hiddens,device=len(vocab),256,d2l.try_gpu()
num_epochs,lr=500,1
model=d2l.RNNModelScratch(len(vocab),num_hiddens,device,get_params,
                         init_gru_state,gru)
d2l.train_ch8(model,train_iter,vocab,lr,num_epochs,device)


####(2)简洁实现，使用框架中的API
num_inputs=vocab_size
gru_layer=nn.GRU(num_inputs,num_hiddens)
model=d2l.RNNModel(gru_layer,len(vocab))
model=model.to(device)
d2l.train_ch8(model,train_iter,vocab,lr,num_epochs,device)


In [None]:
##9.2 长短期记忆网络LSTM 

#(1)从0开始实现
import torch
from torch import nn
from d2l import torch as d2l

#加载数据
batch_size,num_steps=32,35
train_iter,vocab=d2l.load_data_time_machine(batch_size,num_steps)

#初始化模型参数
def get_lstm_params(vocab_size,num_hiddens,device): #vocab_size为词表大小，超参数num_hiddens为隐藏单元数量
    num_inputs=num_outputs=vocab_size
    
    def normal(shape): #权重参数初始化函数
        return torch.randn(size=shape,device=device)*0.01 #按照标准差0.01的高斯分布初始化权重参数
    
    def three(): #参数初始化函数，偏置b设置为0
        return (normal((num_inputs,num_hiddens)),
               normal((num_hiddens,num_hiddens)),
               torch.zeros(num_hiddens,device=device))
    
    W_xi,W_hi,b_i=three() #输入门参数
    W_xf,W_hf,b_f=three() #遗忘门参数
    W_xo,W_ho,b_o=three() #输出门参数
    W_xc,W_hc,b_c=three() #候选门参数
    W_hq=normal((num_hiddens,num_outputs)),b_q=torch.zeros(num_outputs,device=device) #输出层参数
    #附加梯度
    params=[W_xi,W_hi,b_i,W_xf,W_hf,b_f,W_xo,W_ho,b_o,W_xc,W_hc,b_c,W_hq,b_q]
    for param in params:
        param.requires_grad_(True)
    return params

#定义模型
#初始化状态函数
def init_lstm_state(batch_size,num_hiddens,device): #状态初始化函数
    return (torch.zeros((batch_size,num_hiddens),device=device),
            torch.zeros((batch_size,num_hiddens),device=device))

#LSTM模型
def lstm(inputs,state,params):
    [W_xi,W_hi,b_i,W_xf,W_hf,b_f,W_xo,W_ho,b_o,W_xc,W_hc,b_c,W_hq,b_q]=params
    (H,C)=state
    outputs=[]
    for X in inputs:
        I=torch.sigmoid((X @ W_xi)+(H @ W_hi)+b_i)
        F=torch.sigmoid((X @ W_xf)+(H @ W_hf)+b_f)
        O=torch.sigmoid((X @ W_xo)+(H @ W_ho)+b_o)
        C_t=torch.tanh((X @ W_xc)+(H @ W_hc)+b_c)
        C=F * C+I * C_t
        H=O * torch.tanh(C)
        Y=(H @ W_hq)+b_q
        outputs.append(Y)
    return outputs,(H,C)

#训练和预测
vocab_size,num_hiddens,device=len(vocab),256,d2l.try_gpu()
num_epochs,lr=500,1
model=d2l.RNNModelScratch(len(vocab),num_hiddens,device,get_lstm_params,init_lstm_state,lstm)
d2l.train_ch8(model,train_iter,vocab,lr,num_epochs,device)


#(2)简洁实现-使用框架的API
num_inputs=vocab_size
lstm_layer=nn.LSTM(num_inputs,num_hiddens)
model=d2l.RNNModel(lstm_layer,len(vocab))
model=model.to(device)
d2l.train_ch8(model,train_iter,vocab,lr,num_epochs,device)


In [None]:
###9.3 深度循环神经网络 deep_rnn

#(1)简洁实现:Deep-LSTM网络-使用框架API
import torch
from torch import nn
from d2l import torch as d2l

#加载数据
batch_size,num_steps=32,35
train_iter,vocab=d2l.load_data_time_machine(batch_size,num_steps)

#模型实例化
vocab_size,num_hiddens,num_layers=len(vocab),256,2  #num_layers用于设定隐藏层数
num_inputs=vocab_size
device=d2l.try_gpu()
lstm_layer=nn.LSTM(num_inputs,num_hiddens,num_layers)
model=d2l.RNNModel(lstm_layer,len(vocab))
model=model.to(device)

#训练与预测
#由于使用了长短期记忆网络模型来实例化两个层，训练速度会大大降低
num_epochs,lr=500,2
d2l.train_ch8(model,train_iter,vocab,lr,num_epochs,device)


In [None]:
###9.4 双向循环神经网络 bi_rnn

import torch 
from torch import nn
from d2l import torch as d2l

#加载数据
batch_size,num_steps,device=32,35,d2l.try_gpu()
train_iter,vocab=d2l.load_data_time_machine(batch_size,num_steps)
#通过设置"bidirective=True"来定义双向LSTM模型
vocab_size,num_hiddens,num_layers=len(vocab),256,2
num_inputs=vocab_size
lstm_layer=nn.LSTM(num_inputs,num_hiddens,num_layers,bidirectional=True)
model=d2l.RNNModel(lstm_layer,len(vocab))
model=model.to(device)
#训练模型
num_epochs,lr=500,1
d2l.train_ch8(model,train_iter,vocab,lr,num_epochs,device)


