In [1]:
import torch
import torch.nn as nn

In [2]:
# 实例化对象
# 第一个参数 input_size(输入张量x维度)
# 第二个参数 hidden_size(隐层的维度,隐藏神经元数量)
# 第三个参数 num_layers(隐藏层层数)
input_size = 5
hidden_size = 6
num_layers = 2
seq_len = 1
batch_size = 3
gru = nn.GRU(input_size, hidden_size, num_layers, bidirectional=True)

# 初始化张量input1
# 第一个参数: seq_len(序列长度)
# 第二个参数: batch_size(批次样本数)
# 第三个参数: input_size(输入张量x维度)
input1 = torch.randn(seq_len, batch_size, input_size)
# 初始化隐层 h0
# 第一个参数 num_layers*num_directions(隐藏层层数*方向数)
# 第二个参数 batch_size(批次样本数)
# 第三个参数 hidden_size(隐层的维度,隐藏神经元数量)
h0 = torch.randn(num_layers*2, batch_size, hidden_size)

output, hn = gru(input1, h0)
print(output.shape)
print(hn.shape)

torch.Size([1, 3, 12])
torch.Size([4, 3, 6])


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:

# class CustomGRU(nn.Module):
#     def __init__(self, input_size, hidden_size, num_layers=1, bidirectional=False):
#         super(CustomGRU, self).__init__()
#         self.hidden_size = hidden_size
#         self.num_layers = num_layers
#         self.bidirectional = bidirectional
#         self.num_directions = 2 if bidirectional else 1
        
#         # 定义每一层的参数
#         self.gru_cells = nn.ModuleList()
#         for layer in range(num_layers):
#             input_dim = input_size if layer == 0 else hidden_size * self.num_directions
#             self.gru_cells.append(GRUCell(input_dim, hidden_size))

#     def forward(self, x, h_0=None):
#         seq_len, batch_size, _ = x.size()

#         # 初始化隐藏状态
#         if h_0 is None:
#             h_0 = torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size, device=x.device)

#         h_n = []
#         for layer in range(self.num_layers):
#             # 只提取对应层的隐藏状态，避免 h 维度不匹配，因此添加了squeeze(0)
#             h = h_0[layer * self.num_directions: (layer + 1) * self.num_directions].squeeze(0)
#             outputs = []
#             for t in range(seq_len):
#                 h = self.gru_cells[layer](x[t], h)
#                 outputs.append(h)
#             x = torch.stack(outputs, dim=0)  # 叠加时间步
#             h_n.append(h)
        
#         # 返回所有层的最后一个隐藏状态
#         return x, torch.stack(h_n, dim=0)

# class GRUCell(nn.Module):
#     def __init__(self, input_size, hidden_size):
#         super(GRUCell, self).__init__()
#         self.hidden_size = hidden_size

#         # 定义 GRU 的权重矩阵
#         self.W_z = nn.Linear(input_size + hidden_size, hidden_size)
#         self.W_r = nn.Linear(input_size + hidden_size, hidden_size)
#         self.W_h = nn.Linear(input_size + hidden_size, hidden_size)

#     def forward(self, x, h):
#         # 拼接输入和隐藏状态
#         combined = torch.cat([x, h], dim=-1)

#         # 计算更新门和重置门
#         z_t = torch.sigmoid(self.W_z(combined))
#         r_t = torch.sigmoid(self.W_r(combined))

#         # 计算候选隐藏状态
#         combined_r = torch.cat([x, r_t * h], dim=-1)
#         h_tilde = torch.tanh(self.W_h(combined_r))

#         # 计算新的隐藏状态
#         h_new = (1 - z_t) * h + z_t * h_tilde

#         return h_new


In [2]:
# 实现双向计算
class CustomGRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1, bidirectional=False):
        super(CustomGRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.num_directions = 2 if bidirectional else 1
        
        # 定义每一层的 GRUCell
        self.gru_cells = nn.ModuleList()
        for layer in range(num_layers):
            input_dim = input_size if layer == 0 else hidden_size * self.num_directions
            self.gru_cells.append(GRUCell(input_dim, hidden_size))
            if bidirectional:
                self.gru_cells.append(GRUCell(input_dim, hidden_size))  # 反向的GRUCell

    def forward(self, x, h_0=None):
        seq_len, batch_size, _ = x.size()

        # 初始化隐藏状态
        if h_0 is None:
            h_0 = torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size, device=x.device)

        h_n = []
        for layer in range(self.num_layers):
            # 正向计算
            h_fwd = h_0[layer * self.num_directions]  # 正向的隐藏状态
            outputs_fwd = []
            for t in range(seq_len):
                h_fwd = self.gru_cells[layer * self.num_directions](x[t], h_fwd)
                outputs_fwd.append(h_fwd)
            outputs_fwd = torch.stack(outputs_fwd, dim=0)

            if self.bidirectional:
                # 反向计算
                h_bwd = h_0[layer * self.num_directions + 1]  # 反向的隐藏状态
                outputs_bwd = []
                for t in reversed(range(seq_len)):
                    h_bwd = self.gru_cells[layer * self.num_directions + 1](x[t], h_bwd)
                    outputs_bwd.append(h_bwd)
                outputs_bwd = torch.stack(outputs_bwd[::-1], dim=0)  # 调整为正向顺序

                # 拼接正向和反向的输出
                x = torch.cat([outputs_fwd, outputs_bwd], dim=-1)
            else:
                x = outputs_fwd

            h_n.append(h_fwd if not self.bidirectional else torch.cat([h_fwd, h_bwd], dim=-1))

        # 返回最后一层的所有时间步的输出，以及所有层的最后一个隐藏状态
        return x, torch.stack(h_n, dim=0)

class GRUCell(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(GRUCell, self).__init__()
        self.hidden_size = hidden_size

        # 定义 GRU 的权重矩阵
        self.W_z = nn.Linear(input_size + hidden_size, hidden_size)
        self.W_r = nn.Linear(input_size + hidden_size, hidden_size)
        self.W_h = nn.Linear(input_size + hidden_size, hidden_size)

    def forward(self, x, h):
        # 拼接输入和隐藏状态
        combined = torch.cat([x, h], dim=-1)

        # 计算更新门和重置门
        z_t = torch.sigmoid(self.W_z(combined))
        r_t = torch.sigmoid(self.W_r(combined))

        # 计算候选隐藏状态
        combined_r = torch.cat([x, r_t * h], dim=-1)
        h_tilde = torch.tanh(self.W_h(combined_r))

        # 计算新的隐藏状态
        h_new = (1 - z_t) * h + z_t * h_tilde

        return h_new


In [3]:
input_size = 5
hidden_size = 6
num_layers = 2
seq_len = 1
batch_size = 3
gru = CustomGRU(input_size, hidden_size, num_layers, bidirectional=True)
input1 = torch.randn(seq_len, batch_size, input_size)
output, hn = gru(input1)
print(output.shape)
print(hn.shape)

torch.Size([1, 3, 12])
torch.Size([2, 3, 12])


In [9]:
# 单向没有问题，双向错误
class MyGRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, bidirectional=False):
        super(MyGRU, self).__init__()
        # 定义网络参数
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.num_directions = 2 if self.bidirectional else 1

        # 定义每一层的结构
        self.gru_layers = nn.ModuleList()
        for layer in range(self.num_layers):
            in_size = self.input_size if layer == 0 else self.hidden_size * self.num_directions
            self.gru_layers.append(GRU_Cell(in_size, self.hidden_size))

    def forward(self, inputs, h0=None):
        seq_len, batch_size, _ = inputs.size()

        if h0 is None:
            h0 = torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size)
        
        h_n = []
        for layer in range(self.num_layers):
            # 只提取对应层的隐藏状态
            # 正向计算
            h_fwd = h0[layer*self.num_directions] # 正向的隐藏状态
            outputs_fwd = []
            for t in range(seq_len):
                h_fwd = self.gru_layers[layer * self.num_directions](inputs[t], h_fwd)
                outputs_fwd.append(h_fwd)
            outputs_fwd = torch.stack(outputs_fwd, dim=0)

            if self.bidirectional:
                # 计算反向
                # 获取反向隐藏状态
                h_bwd = h0[layer * self.num_directions + 1]
                outputs_bwd = []
                for t in reversed(range(seq_len)):
                    h_bwd = self.gru_layers[layer * self.num_directions + 1](inputs[t], h_bwd)
                    outputs_bwd.append(h_bwd)
                outputs_bwd = torch.stack(outputs_bwd[:, :, -1], dim=0)

                # 拼接正向和反向
                inputs = torch.cat([outputs_fwd, outputs_bwd], dim=-1)
            else:
                inputs = outputs_fwd

            h_n.append(h_fwd if not self.bidirectional else torch.cat([h_fwd, h_bwd], dim=-1))

        return inputs, torch.stack(h_n, dim=0)
                    
        
            


class GRU_Cell(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(GRU_Cell, self).__init__()
        # 在单层内的结构定义
        self.input_size = input_size
        self.hidden_size = hidden_size

        self.w_z = nn.Linear(self.input_size + self.hidden_size, self.hidden_size)
        self.w_r = nn.Linear(self.input_size + self.hidden_size, self.hidden_size)
        self.w_t = nn.Linear(self.input_size + self.hidden_size, self.hidden_size)

    
    # 在单层内前向传播
    def forward(self, inputs, h_pre):
        concat = torch.cat([inputs, h_pre], dim=-1)

        # 计算重置门和更新门
        z_t = torch.sigmoid(self.w_z(concat))
        r_t = torch.sigmoid(self.w_r(concat))
        # 计算候选隐藏状态
        rh = r_t * h_pre
        rx = torch.cat([inputs, rh], dim=-1)
        h_t_title = torch.tanh(self.w_t(rx))
        # 计算新的隐藏状态
        h_t = (1 - z_t) * h_pre + z_t * h_t_title
        return h_t

In [12]:
input_size = 5
hidden_size = 6
num_layers = 2
seq_len = 1
batch_size = 3
gru = MyGRU(input_size, hidden_size, num_layers, bidirectional=False)
input1 = torch.randn(seq_len*2, batch_size, input_size)
output, hn = gru(input1)
print(output.shape)
print(hn.shape)

torch.Size([2, 3, 6])
torch.Size([2, 3, 6])
