In [1]:
import torch
from torch import nn

## RNN

In [2]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.w_ih = torch.randn(input_size, hidden_size)
        self.w_hh = torch.randn(hidden_size, hidden_size)
        self.b_ih = torch.randn(1, hidden_size)
        self.b_hh = torch.randn(1, hidden_size)
        
    def forward(self, inputs, h_0):
        L,N,d = inputs.shape # 分别对应序列长度、批量大小和特征维度
        H = h_0[0]  # 因为h_0的形状为（1，N，h），我们需要使用（N,h）去计算
        outputs = torch.zeros(L,N,H.shape[1])
        for t in range(L):
            X_t = inputs[t]
            H = torch.tanh(X_t @ self.w_ih + self.b_ih + H @ self.w_hh + self.b_hh)
            outputs[t] = H
        h_n = outputs[-1].unsqueeze(0)  # h_n实际上就是h_L，但此时的形状为(N,h)
        return outputs,h_n

In [3]:
torch.manual_seed(42)
seq = torch.randn(4,6)  # L,d
inputs = seq.unsqueeze(1)  # batch_first = False  L,N,d
h_0 = torch.randn(1,1,3) # D为单向RNN还是双向RNN，D等于1为单向，等于2为双向，  参数：Rnn层数*D，batch_size，hidden_size

# 保持RNN内部参数：权重和偏置一致
rnn = nn.RNN(6,3)
params = [param.data.T for param in rnn.parameters()]
my_rnn = RNN(6,3)
my_rnn.w_ih = params[0]
my_rnn.w_hh = params[1]
my_rnn.b_ih[0] = params[2]
my_rnn.b_hh[0] = params[3]

outputs, h_n = my_rnn(inputs, h_0)
print(outputs)
print(h_n)

outputs2, h_n2 = rnn(inputs, h_0)
print(outputs2)
print(h_n2)

<torch._C.Generator at 0x1d14e23c270>

tensor([[[-0.5428,  0.9207,  0.7060]],

        [[-0.2245,  0.2461, -0.4578]],

        [[ 0.5950, -0.3390, -0.4598]],

        [[ 0.9281, -0.7660,  0.5954]]])
tensor([[[ 0.9281, -0.7660,  0.5954]]])
tensor([[[-0.5428,  0.9207,  0.7060]],

        [[-0.2245,  0.2461, -0.4578]],

        [[ 0.5950, -0.3390, -0.4598]],

        [[ 0.9281, -0.7660,  0.5954]]], grad_fn=<StackBackward0>)
tensor([[[ 0.9281, -0.7660,  0.5954]]], grad_fn=<StackBackward0>)


  params = [param.data.T for param in rnn.parameters()]


In [1]:
import torch
from torch import nn

In [3]:
# 网络参数
input_size = 10
hidden_size = 20
num_layers = 1

# 数据参数
seq_len = 5
batch_size = 3
data_dim = input_size

# 输入数据
data = torch.randn(seq_len, batch_size, data_dim)

# 官方pytorch中的RNN 
# nn.Conv1d(in_channels,64,kernel_size=7,stride=2,padding=3,bias=False)
ornn = nn.RNN(input_size, hidden_size, num_layers)

# init hidden state
h0 = torch.randn(num_layers,batch_size,hidden_size)

# rnn implemented by myself
class MyRNN():
    def __init__(self):
        # 保证权重和偏差的参数和原始的rnn相同 并将最终的结果和原始rnn进行比较
        self.w_ih = torch.nn.Parameter(ornn.weight_ih_l0.T)  # 乘以当前x
        self.b_ih = torch.nn.Parameter(ornn.bias_ih_l0)  # 附带偏差
        self.w_hh = torch.nn.Parameter(ornn.weight_hh_l0.T)  # 乘以上一时刻h
        self.b_hh = torch.nn.Parameter(ornn.bias_hh_l0)
        self.ht = torch.nn.Parameter(h0)
        self.myoutput = []
        
    def forward(self, x): # shape:(seq_len, batch_size, data_dim)
        for i in range(seq_len): # 这一行是理解RNN的关键
            igates = torch.matmul(x[i], self.w_ih) + self.b_ih
            hgates = torch.matmul(self.ht, self.w_hh) + self.b_hh
            self.ht = torch.tanh(igates + hgates) # 这一行是RNN的公式
            self.myoutput.append(self.ht)
        return self.ht, self.myoutput

myrnn = MyRNN()
myht, myoutput = myrnn.forward(data)
official_output, official_hn = ornn(data,h0)

print ('myht:')
print (myht)
print ('official_hn:')
print (official_hn)

print ("--" * 40)
print ('myoutput:')
print (myoutput)
print ('official_output:')
print (official_output)

myht:
tensor([[[ 0.0427, -0.0507, -0.1051,  0.0558,  0.5496, -0.4055, -0.3267,
          -0.2305,  0.0300,  0.4706,  0.5252,  0.5213, -0.5046,  0.1732,
           0.2074,  0.4352, -0.6096, -0.2494, -0.2184, -0.4104],
         [ 0.0566,  0.4964,  0.1012, -0.4300, -0.1775, -0.0015, -0.5925,
          -0.5441, -0.0066,  0.7015,  0.0342,  0.4849, -0.0690, -0.1957,
          -0.2227, -0.4780,  0.6217, -0.5629,  0.5687,  0.3895],
         [ 0.5041, -0.1209, -0.7399, -0.3278, -0.1692,  0.1554, -0.6658,
          -0.0643,  0.5279,  0.4411, -0.1411,  0.3250, -0.4472, -0.3599,
          -0.6736, -0.3189,  0.5611, -0.0729, -0.7830, -0.3426]]],
       grad_fn=<TanhBackward0>)
official_hn:
tensor([[[ 0.0427, -0.0507, -0.1051,  0.0558,  0.5496, -0.4055, -0.3267,
          -0.2305,  0.0300,  0.4706,  0.5252,  0.5213, -0.5046,  0.1732,
           0.2074,  0.4352, -0.6096, -0.2494, -0.2184, -0.4104],
         [ 0.0566,  0.4964,  0.1012, -0.4300, -0.1775, -0.0015, -0.5925,
          -0.5441, -0.0066,  0

In [1]:
import torch
from torch import nn

In [3]:
# 网络参数
input_size = 10
hidden_size = 20
num_layers = 2

# 数据参数
seq_len = 5
batch_size = 3
data_dim = input_size

data = torch.randn(seq_len, batch_size, data_dim)

# pytorch中原始的RNN
ornn = nn.RNN(input_size, hidden_size, num_layers)
h0 = torch.randn(num_layers, batch_size, hidden_size)

class MyRNN():
    def __init__(self):
        # input_size, hidden_size
        self.w_ih = torch.nn.Parameter(ornn.weight_ih_l0.T)
        self.b_ih = torch.nn.Parameter(ornn.bias_ih_l0)
        self.w_hh = torch.nn.Parameter(ornn.weight_hh_l0.T)
        self.b_hh = torch.nn.Parameter(ornn.bias_hh_l0)
        self.ht = torch.nn.Parameter(h0)
        self.myoutput = []
        if num_layers == 2:
            self.ht = torch.nn.Parameter(h0[0])
            self.ht1 = torch.nn.Parameter(h0[1])
            self.w_ih_l1 = torch.nn.Parameter(ornn.weight_ih_l1.T)
            self.b_ih_l1 = torch.nn.Parameter(ornn.bias_ih_l1)
            self.w_hh_l1 = torch.nn.Parameter(ornn.weight_hh_l1.T)
            self.b_hh_l1 = torch.nn.Parameter(ornn.bias_hh_l1)
            
    def forward(self, x): # x:(seq_len, batch_size, data_dim)
        for i in range(seq_len):
            # 第一层，应用公式
            igates = torch.matmul(x[i], self.w_ih) + self.b_ih
            hgates = torch.matmul(self.ht, self.w_hh) + self.b_hh
            self.ht = torch.tanh(igates + hgates) # ht 更新
            if num_layers == 2:
                igates = torch.matmul(self.ht, self.w_ih_l1) + self.b_ih_l1
                hgates = torch.matmul(self.ht1, self.w_hh_l1) + self.b_hh_l1
                self.ht1 = torch.tanh(igates + hgates) # ht1 更新
            ht_final_layer = [self.ht, self.ht1]
            self.myoutput.append(self.ht1)  # 仅仅保留最后一层的输出
        return ht_final_layer, self.myoutput
    
myrnn = MyRNN()
myht, myoutput = myrnn.forward(data)
official_output, official_hn = ornn(data, h0)

print('myht:')
print(myht)
print('official_hn:')
print(official_hn)

print("--" * 40)
print('myoutput:')
print(myoutput)
print('official_output:')
print(official_output)

myht:
[tensor([[ 5.7555e-01,  6.3959e-02, -4.8237e-04,  3.6376e-01, -2.6510e-01,
          4.7944e-02,  2.5967e-01, -3.6259e-01,  8.1449e-01, -7.0938e-01,
          6.8870e-01, -8.3581e-01, -6.6371e-01,  5.6467e-01,  6.0072e-01,
          1.5433e-01, -4.4449e-01, -3.4524e-02, -7.6776e-01,  5.6837e-02],
        [-4.6934e-03, -2.8252e-01,  2.3753e-01, -8.6996e-01, -4.3868e-01,
         -2.7403e-02,  5.4931e-01, -1.3780e-01,  1.2170e-02, -2.2654e-01,
          7.5068e-01, -9.0088e-01,  1.9875e-01,  3.6503e-01, -9.1360e-01,
         -4.6566e-01, -4.4158e-01, -2.4151e-01,  4.4735e-01, -6.7096e-02],
        [ 1.3250e-01,  2.9181e-01,  6.0930e-01,  9.4492e-01, -1.2857e-01,
         -6.6635e-01, -7.5297e-01, -8.0852e-01,  7.5963e-01,  5.8442e-01,
          1.9924e-01,  1.7311e-01, -6.5252e-01,  8.7590e-01, -2.6455e-02,
          4.3857e-01, -1.3647e-01, -7.3224e-01, -1.3205e-01,  3.8846e-01]],
       grad_fn=<TanhBackward0>), tensor([[-0.4547, -0.5596,  0.3929, -0.0198,  0.4176,  0.0522,  0.13

# LSTM

In [1]:
import torch.nn as nn
import torch

In [2]:
lstm = nn.LSTM(10,20,num_layers = 2, bidirectional = True)

# text:[序列长度，batch_size，词向量的维度 input_size]
text = torch.randn(5,3,10) 
# h_0: [num_layers * num_directions， batch_size, hidden_size]
h_0 = torch.randn(4,3,20)
# c_0: [num_layers * num_directions, batch_size, hidden_size]
c_0 = torch.randn(4,3,20)
output, (h_n, c_n) = lstm(text)

# output: [seq_length, batch_size, num_directions * hidden_size]
print(output.shape)
# h_n: [num_layers * num_directions, batch_size, hidden_size]
print(h_n.size())
# c_n:[num_layers * num_directions, batch_size, hidden_size]
print(c_n.shape)

torch.Size([5, 3, 40])
torch.Size([4, 3, 20])
torch.Size([4, 3, 20])


In [3]:
import torch
import torch.nn as nn

batch_size = 10
seq_length = 20 # 句子的长度
dictionary_size = 100 # 词典中词语的数量
embedding_dim = 30 # 长度为30的向量表示一个词语
hidden_size = 18
num_layer = 2

# 构造一个batch的数据
text = torch.randint(low = 0, high = 100, size = [batch_size, seq_length])
print(text.shape)

# 数据经过embedding处理
embedding = nn.Embedding(dictionary_size, embedding_dim)
text_embedded = embedding(text)

# 传入LSTM
lstm = nn.LSTM(input_size = embedding_dim,
              hidden_size = hidden_size,
              num_layers = num_layer,
              batch_first = True)

'''
output: [batch_size, seq_length, num_directions * hidden_size]
h_n: [num_layers * num_directions, batch_size, hidden_size]
c_n: [num_layers * num_directions, batch_size, hidden_size]
'''
output, (h_n, c_n) = lstm(text_embedded)

# output把每一个时间步上的结果在seq_length这一维度上进行了拼接
print(output.shape) # torch.Size([10,20,18])
print(f"{'*' * 20}")

# h_n把不同层的隐藏状态在第0个维度上进行了拼接
print(h_n.size()) # torch,Size([2,10,18])
print(f"{'*' * 20}")

print(c_n.shape)  # torch.Size([2, 10, 18])
print(f"{'*' * 20}")

# 最后一次的h_1应该和output的最后一个time step的输出是一样的

# 获取最后一个时间步上的输出
last_output = output[:, -1, :]

# 获取最后一次的hidden_state
last_hidden_state = h_n[-1, :, :]

print(last_output == last_hidden_state)

torch.Size([10, 20])


'\noutput: [batch_size, seq_length, num_directions * hidden_size]\nh_n: [num_layers * num_directions, batch_size, hidden_size]\nc_n: [num_layers * num_directions, batch_size, hidden_size]\n'

torch.Size([10, 20, 18])
********************
torch.Size([2, 10, 18])
********************
torch.Size([2, 10, 18])
********************
tensor([[True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, 

In [None]:
# 交叉熵损失：把softmax概率传入对数似然损失得到的损失函数称为交叉熵损失：
# 在pytorch中有两种方法实现交叉熵损失：
criterion=nn.CrossEntropyLoss()
loss=criterion(input, target)

In [5]:
# 1、对输出值计算softmax和取对数
output=F.log_softmax(x,dim=-1)
# 2、使用torch中带权损失
loss=F.nll_loss(output,target)

NameError: name 'x' is not defined

### 双向LSTM

In [8]:
import torch
import torch.nn as nn

batch_size = 10
seq_length = 20
dictionary_size = 100
embedding_dim = 30
hidden_size = 18
num_layer = 1

# 构造一个batch的数据
text = torch.randint(low = 0, high = 100, size = [batch_size, seq_length])
print(text.shape)
print(f"{'*' * 20}")

# 数据经过embedding处理
embedding = nn.Embedding(dictionary_size, embedding_dim)
text_embedded = embedding(text)
print(text_embedded.shape)

# 传入LSTM
lstm = nn.LSTM(input_size = embedding_dim,
              hidden_size = hidden_size,
              num_layers = num_layer,
              batch_first = True, bidirectional = True)

"""
output: [batch_size, seq_length, num_directions * hidden_size]
h_n: [num_layers * num_directions, batch_size, hidden_size]
c_n: [num_layers * num_directions, batch_size, hidden_size]
"""
output, (h_n, c_n) = lstm(text_embedded)

# output把每一个时间步上的结果在seq_length这一维度上进行了拼接
# 如果lstm是双向的，则output的num_directions * hidden_size维度中前面是前hidden_size个数据是正向lstm的输出，
# 后hidden_size个数据是反向lstm的输出
print(output.shape)  # torch.Size([10, 20, 36])
print(f"{'*' * 20}")

# h_n把不同层的隐层状态在第0个维度上进行了拼接
# h_n把双向lstm中正向的hidden_state和反向的hidden_state在第0个维度上进行了拼接
print(h_n.size())
print(f"{'*' * 20}")

print(c_n.shape)
print(f"{'*' * 20}")

# 获取双向lstm中正向的最后一个时间步output
forward_output = output[:, -1, :18]
print(forward_output.shape)
print(f"{'*' * 20}")

# 获取双向lstm中正向的最后一个hidden_state
forward_h_n = h_n[-2,:,:]
print(forward_h_n.shape)
print(f"{'*' * 20}")

print(f"正向output和正向h_n是否相等：{forward_output == forward_h_n}")

# 获取双向lstm中反向最后一个时间步的output
backward_output = output[:, 0, 18:]
print(backward_output.shape)
print(f"{'*' * 20}")

# 获取双向lstm中反向的最后一个hidden_state
backward_h_n = h_n[-1, :, :]
print(backward_h_n.shape)
print(f"{'*' * 20}")

print(f"反向output和反向h_n是否相等：{backward_output == backward_h_n}")

torch.Size([10, 20])
********************
torch.Size([10, 20, 30])


'\noutput: [batch_size, seq_length, num_directions * hidden_size]\nh_n: [num_layers * num_directions, batch_size, hidden_size]\nc_n: [num_layers * num_directions, batch_size, hidden_size]\n'

torch.Size([10, 20, 36])
********************
torch.Size([2, 10, 18])
********************
torch.Size([2, 10, 18])
********************
torch.Size([10, 18])
********************
torch.Size([10, 18])
********************
正向output和正向h_n是否相等：tensor([[True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],

In [35]:
import torch
import torch.nn as nn

batch_size = 10
seq_length = 20 # 句子的长度
dictionary_size = 100 # 词典中词语的数量
embedding_dim = 30 # 长度为30的向量表示一个词语
hidden_size = 18
num_layer = 2

# 构造一个batch的数据
text = torch.randint(low=0, high=100, size=[batch_size, seq_length])

print(text.shape)

# 数据经过embedding处理
embedding = nn.Embedding(dictionary_size, embedding_dim)
text_embedded = embedding(text)

# 传入LSTM
lstm = nn.LSTM(input_size=embedding_dim,
               hidden_size=hidden_size,
               num_layers=num_layer,
               batch_first=True, bidirectional = True)

'''
output: [batch_size, seq_length, num_directions * hidden_size]
h_n: [num_layers * num_directions, batch_size, hidden_size]
c_n: [num_layers * num_directions, batch_size, hidden_size]
'''
output, (h_n, c_n) = lstm(text_embedded)

# output把每一个时间步上的结果在seq_length这一维度上进行了拼接
print(output.shape)  # torch.Size([10, 20, 36])
print(f"{'*' * 20}")

# h_n把不同层的隐藏状态在第0个维度上进行了拼接
print(h_n.size())  # torch.Size([4, 10, 18])
print(f"{'*' * 20}")

print(c_n.shape)  # torch.Size([4, 10, 18])
print(f"{'*' * 20}")

# 最后一次的h_1应该和output的最后一个time step的输出是一样的

# 获取最后一个时间步上的输出
last_output = output[:, 0, 18:]

# 获取最后一次的hidden_state
last_hidden_state = h_n[-1, :, :]

'''
-4/1 第一层的正向
-3/2 第一层的反向
-2/3 第二层的正向    对应的是[:, -1, :18]
-1/4 第二层的反向    对应的是[:, 0,  18:]
这些相关的方向暂时没有搞懂
'''

print(last_output == last_hidden_state)



torch.Size([10, 20])


'\noutput: [batch_size, seq_length, num_directions * hidden_size]\nh_n: [num_layers * num_directions, batch_size, hidden_size]\nc_n: [num_layers * num_directions, batch_size, hidden_size]\n'

torch.Size([10, 20, 36])
********************
torch.Size([4, 10, 18])
********************
torch.Size([4, 10, 18])
********************


'\n-4/1 第一层的正向\n-3/2 第一层的反向\n-2/3 第二层的正向    对应的是[:, -1, :18]\n-1/4 第二层的反向    对应的是[:, 0,  18:]\n这些相关的方向暂时没有搞懂\n'

tensor([[True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, Tru

## 使用双向LSTM实现文本情感分类

#### 这段代码跑不通

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from dataset import get_dataloader
from pkl import ws, MAX_LEN
from datetime import datetime
from tqdm import tqdm

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

class ImdbModule(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(ImdbModule, self).__init__()
        self.embedding = nn.Embedding(len(ws), input_size, padding_idx=ws.PAD)
        self.hidden_size = hidden_size
        
        '''
        nn.LSTM:
            Args:
                input_size: The number of expected features in the input `x`
                hidden_size: The number of features in the hidden state `h`
                num_layers: Number of recurrent layers.
                bias: If `False`, then the layer does not use bias weights `b_ih` and `b_hh`.
                batch_first: If `True`, then the input and output tensors are provided as (batch, seq, feature).
                bidirectional: If `True`, becomes a bidirectional LSTM.
        '''

        '''
        input:
            x: [seq_length, batch_size, input_size] (tensor containing the features of the input sequence.)
            h_0: [num_layers * num_directions, batch_size, hidden_size] (tensor
              containing the initial hidden state for each element in the batch.)
            c_0: [num_layers * num_directions, batch_size, hidden_size] (tensor
              containing the initial cell state for each element in the batch.)
        return: output, (h_n, c_n):
            output: [seq_length, batch_size, num_directions * hidden_size]
            h_n: [num_layers * num_directions, batch_size, hidden_size]
            c_n: [num_layers * num_directions, batch_size, hidden_size]
        '''
        self.lstm = nn.LSTM(input_size=input_size,
                            hidden_size=self.hidden_size,
                            num_layers=2, bidirectional=True)

        self.linear = nn.Linear(2 * self.hidden_size, output_size)

    def forward(self, x):
        '''
        :param x: [batch_size, seq_length]
        :param h_0: [num_layers * num_directions, batch_size, hidden_size]
        :param c_0: [num_layers * num_directions, batch_size, hidden_size]
        :return:
        '''

        batch_size = x.size(0)

        # x: [batch_size, seq_length, input_size]
        x = self.embedding(x)

        # x: [seq_length, batch_size, input_size]
        x = x.permute(1, 0, 2)

        # output: [seq_length, batch_size, num_directions * hidden_size]
        # h_n: [num_layers * num_directions, batch_size, hidden_size]
        # c_n: [num_layers * num_directions, batch_size, hidden_size]
        output, (h_n, c_n) = self.lstm(x)

        # 往往会使用LSTM or GRU输出的最后一维结果来代表LSTM、GRU对文本处理的结果
        # 使用双向LSTM的时候，往往会使用每个方向最后一次的output，作为当前数据经过双向LSTM的结果

        out = torch.cat((h_n[-2, :, :], h_n[-1, :, :]), dim=-1)
        out = self.linear(out)

        return out


TRAIN_BATCH_SIZE = 128
TEST_BATCH_SIZE = 128
LR = 0.001

imdb = ImdbModule(100, 256, 11).to(device)

optimizer = optim.Adam(imdb.parameters(), lr=LR)

criterion = nn.CrossEntropyLoss().to(device)


def train_test(epoch):
    print(f"{'-' * 10}epoch: {epoch + 1}{'-' * 10}")

    mode = True
    imdb.train(mode)

    train_dataloader, train_data_length = get_dataloader(mode='train', batch_size=TRAIN_BATCH_SIZE)

    for idx, (text, label) in enumerate(train_dataloader):
        text = text.to(device)
        label = label.to(device)

        optimizer.zero_grad()

        # 第一次调用LSTM模型之前，需要初始化隐藏状态，如果不初始化，默认创建全为0的隐藏状态
        output = imdb(text)

        loss = criterion(output, label)

        loss.backward()
        optimizer.step()

        if idx % 50 == 0:
            print(f"第{epoch}轮训练次数为{idx}的误差：{loss.item()}")

    print(f"{'-' * 10}测试开始{'-' * 10}")

    imdb.eval()

    test_dataloader, len_test_data = get_dataloader('test', batch_size=TEST_BATCH_SIZE)

    sum_loss = 0
    total_accuracy = 0

    with torch.no_grad():
        for text, label in tqdm(test_dataloader):
            text = text.to(device)
            label = label.to(device)

            output = imdb(text)

            loss = criterion(output, label)
            sum_loss += loss

            predicted = output.argmax(1)
            accuracy = (predicted == label).sum()

            total_accuracy += accuracy

    print(f"测试集上的loss：{sum_loss}")

    correct_accuracy = total_accuracy / len_test_data
    print(f"整体测试集上的正确率：{correct_accuracy}%")

    print("模型保存成功")
    torch.save(imdb.state_dict(), f'./model/lstm_{epoch}.pth')

    now = datetime.now()
    now = now.strftime("%Y-%m-%d %H:%M:%S")

    content = f"time：{now}\tlstm模型在测试集上的准确率：{correct_accuracy}"

    with open('./accuracy.txt', 'a+', encoding='utf-8') as file:
        file.write(content + '\n')

for epoch in range(100):
    train_test(epoch)


## pytorch代码实现（GRU）

In [2]:
import torch.nn as nn
import torch

'''
GRU:
    Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence.

    args:
        input_size: The number of expected features in the input `x`
        hidden_size: The number of features in the hidden state `h`
        num_layers: Number of recurrent layers.
        bias: If `False`, then the layer does not use bias weights
        bidirectional: If `True`, becomes a bidirectional GRU.
'''

'''
Inputs: input, h_0
    input: [seq_length, batch_size, input_size]
    h_0: [num_layers * num_directions, batch_size, hidden_size]

Outputs: output, h_n
    output: [seq_length, batch_size, num_directions * hidden_size]
    h_n: [num_layers * num_directions, batch_size, hidden_size]
'''

# 双层双向GRU
gru = nn.GRU(input_size=10, hidden_size=20, num_layers=2, bidirectional=True)

# text: [seq_length, batch_size, input_size] 
text = torch.randn(5, 3, 10)

# h_0: [num_layers * num_directions, batch_size, hidden_size]
h_0 = torch.randn(4, 3, 20)

'''
output: [seq_length, batch_size, num_directions * hidden_size]
h_n: [num_layers * num_directions, batch_size, hidden_size]
'''
output, h_n = gru(text, h_0)

# 获取双向gru中正向最后一个时间步的output
forward_output = output[-1,:,:20]
print(forward_output.shape) # [batch_size, hidden_size]
print(f"{'*' * 20}")

# 获取双向gru中正向的最后一个hidden_state
forward_h_n=h_n[-2,:,:]
print(forward_h_n.shape) # [batch_size, hidden_size]
print(f"{'*' * 20}")

print(f"正向output和正向h_n是否相等：{forward_output == forward_h_n}")

# 获取双向gru中反向最后一个时间步的output  正向的为前20，反向的为后20
backward_output=output[0,:,20:]
print(backward_output.shape) # [batch_size, hidden_size]
print(f"{'*' * 20}")

# 获取双向gru中反向的最后一个hidden_state
backward_h_n=h_n[-1,:,:]
print(backward_h_n.shape)
print(f"{'*' * 20}")

print(f"反向output和反向h_n是否相等：{backward_output == backward_h_n}")


'\nGRU:\n    Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence.\n\n    args:\n        input_size: The number of expected features in the input `x`\n        hidden_size: The number of features in the hidden state `h`\n        num_layers: Number of recurrent layers.\n        bias: If `False`, then the layer does not use bias weights\n        bidirectional: If `True`, becomes a bidirectional GRU.\n'

'\nInputs: input, h_0\n    input: [seq_length, batch_size, input_size]\n    h_0: [num_layers * num_directions, batch_size, hidden_size]\n\nOutputs: output, h_n\n    output: [seq_length, batch_size, num_directions * hidden_size]\n    h_n: [num_layers * num_directions, batch_size, hidden_size]\n'

'\noutput: [seq_length, batch_size, num_directions * hidden_size]\nh_n: [num_layers * num_directions, batch_size, hidden_size]\n'

torch.Size([3, 20])
********************
torch.Size([3, 20])
********************
正向output和正向h_n是否相等：tensor([[True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True, True, True]])
torch.Size([3, 20])
********************
torch.Size([3, 20])
********************
反向output和反向h_n是否相等：tensor([[True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, 

## 使用双向GRU实现文本情感分类

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from dataset import get_dataloader
from pkl import ws, MAX_LEN
from datetime import datetime
from tqdm import tqdm

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


class ImdbModule(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(ImdbModule, self).__init__()

        self.embedding = nn.Embedding(len(ws), input_size, padding_idx=ws.PAD)

        self.hidden_size = hidden_size

        '''
        GRU:
            Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence.
            
            args:
                input_size: The number of expected features in the input `x`
                hidden_size: The number of features in the hidden state `h`
                num_layers: Number of recurrent layers.
                bias: If `False`, then the layer does not use bias weights
                bidirectional: If `True`, becomes a bidirectional GRU.
        '''

        '''
        Inputs: input, h_0
            input: [seq_length, batch, input_size]
            h_0: [num_layers * num_directions, batch_size, hidden_size]
            
        Outputs: output, h_n
            output: [seq_length, batch_size, num_directions * hidden_size]
            h_n: [num_layers * num_directions, batch_size, hidden_size]
        '''

        self.gru = nn.GRU(input_size=input_size,
                          hidden_size=self.hidden_size,
                          num_layers=2,
                          bidirectional=True,
                          dropout=0.5)

        self.linear = nn.Linear(in_features=2 * hidden_size, out_features=output_size)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        '''
        :param x: [batch_size, seq_length]
        :return:
        '''

        batch_size = x.size(0)

        # x: [batch_size, seq_length, embedding_dim]
        x = self.embedding(x)

        # x: [seq_length, batch_size, input_size]
        x = x.permute(1, 0, 2)

        '''
        output: [seq_length, batch_size, num_directions * hidden_size]
        h_n: [num_layers * num_directions, batch_size, hidden_size]
        '''
        output, h_n = self.gru(x)

        h_n = torch.cat((h_n[-2, :, :], h_n[-1, :, :]), dim=-1)

        out = self.linear(h_n)

        return out


TRAIN_BATCH_SIZE = 128
TEST_BATCH_SIZE = 128
LR = 0.001

imdb = ImdbModule(256, 256, 11).to(device)

optimizer = optim.Adam(imdb.parameters(), lr=LR)

criterion = nn.CrossEntropyLoss().to(device)


def train_test(epoch):
    print(f"{'-' * 10}epoch: {epoch + 1}{'-' * 10}")

    mode = True
    imdb.train(mode)

    train_dataloader, train_data_length = get_dataloader(mode='train', batch_size=TRAIN_BATCH_SIZE)

    for idx, (text, label) in enumerate(train_dataloader):
        text = text.to(device)
        label = label.to(device)

        optimizer.zero_grad()

        # 第一次调用LSTM模型之前，需要初始化隐藏状态，如果不初始化，默认创建全为0的隐藏状态
        output = imdb(text)

        loss = criterion(output, label)

        loss.backward()
        optimizer.step()

        if idx % 50 == 0:
            print(f"第{epoch}轮训练次数为{idx}的误差：{loss.item()}")

    print(f"{'-' * 10}测试开始{'-' * 10}")

    imdb.eval()

    test_dataloader, len_test_data = get_dataloader('test', batch_size=TEST_BATCH_SIZE)

    sum_loss = 0
    total_accuracy = 0

    with torch.no_grad():
        for text, label in tqdm(test_dataloader):
            text = text.to(device)
            label = label.to(device)

            output = imdb(text)

            loss = criterion(output, label)
            sum_loss += loss

            predicted = output.argmax(1)
            accuracy = (predicted == label).sum()

            total_accuracy += accuracy

    print(f"测试集上的loss：{sum_loss}")

    correct_accuracy = total_accuracy / len_test_data
    print(f"整体测试集上的正确率：{correct_accuracy}%")

    print("模型保存成功")
    torch.save(imdb.state_dict(), f'./model/lstm_{epoch}.pth')

    now = datetime.now()
    now = now.strftime("%Y-%m-%d %H:%M:%S")

    content = f"time：{now}\tlstm模型在测试集上的准确率：{correct_accuracy}"

    with open('./accuracy.txt', 'a+', encoding='utf-8') as file:
        file.write(content + '\n')


for epoch in range(100):
    train_test(epoch)
