In [1]:
# LSTM结构掌握
import torch
import torch.nn as nn

# 定义 LSTM
lstm = nn.LSTM(input_size=8, hidden_size=64, num_layers=1, batch_first=True)

# 随机输入 (batch_size=32, seq_len=10, input_dim=8)
x = torch.randn(32, 10, 8)
output, (h_n, c_n) = lstm(x)

# 打印参数形状
for name, param in lstm.named_parameters():
    print(f"{name}: {param.shape}")


weight_ih_l0: torch.Size([256, 8])
weight_hh_l0: torch.Size([256, 64])
bias_ih_l0: torch.Size([256])
bias_hh_l0: torch.Size([256])


In [1]:
import numpy as np

# ============================
# LSTM 数学示例（batch_first=True）
# ============================

# 输入数据和初始隐藏状态
# 假设 batch_size = 2，seq_len = 3，input_size = 3
x_t = np.array(
    [
        [[0.5, 0.1, -0.3], [0.3, 0.4, 0.8], [-0.2, 0.6, 0.7]],  # 第一个样本 (3, 3)
        [[-0.1, 0.2, 0.5], [0.4, -0.3, 0.9], [0.6, 0.1, -0.4]],  # 第二个样本 (3, 3)
    ]
)  # 形状 (batch_size=2, seq_len=3, input_size=3)

# 初始隐藏状态和细胞状态
h_prev = np.zeros((2, 2))  # (batch_size=2, hidden_size=2)
c_prev = np.zeros((2, 2))  # (batch_size=2, hidden_size=2)

# 拼接后的权重和偏置
W_ih = np.random.randn(
    8, 3
)  # 输入到隐藏层的权重 (4 * hidden_size, input_size=3) => (8, 3)
W_hh = np.random.randn(
    8, 2
)  # 隐藏状态到隐藏层的权重 (4 * hidden_size, hidden_size=2) => (8, 2)
b = np.random.randn(8, 1)  # 偏置项 (4 * hidden_size, 1) => (8, 1)

# 遍历每个时间步
for t in range(3):  # seq_len = 3
    x_step = x_t[:, t, : ]  # 当前时间步的输入数据，形状 (batch_size, input_size) => (2, 3)

    # Step 1: 计算线性变换结果 Z（未激活的门值）
    z = np.dot(x_step, W_ih.T) + np.dot(h_prev, W_hh.T) + b.T  # 形状 (batch_size, 8)

    # Step 2: 切分 Z，分别计算四个门的激活值
    f_t = 1 / (1 + np.exp(-z[:, :2]))  # 遗忘门 (batch_size, 2)
    i_t = 1 / (1 + np.exp(-z[:, 2:4]))  # 输入门 (batch_size, 2)
    c_tilde = np.tanh(z[:, 4:6])  # 候选状态 (batch_size, 2)
    o_t = 1 / (1 + np.exp(-z[:, 6:]))  # 输出门 (batch_size, 2)

    # Step 3: 更新细胞状态 c_t
    c_prev = f_t * c_prev + i_t * c_tilde  # 形状 (batch_size, 2)

    # Step 4: 计算当前时间步的隐藏状态 h_t
    h_prev = o_t * np.tanh(c_prev)  # 形状 (batch_size, 2)

# 输出每个样本的最终隐藏状态
print("h_t (最终隐藏状态):", h_prev)


h_t (最终隐藏状态): [[ 0.32201566 -0.1219501 ]
 [ 0.1272508  -0.10326343]]


In [10]:
import numpy as np
import torch
import torch.nn as nn

# ============================
# 基础流程实现 LSTM (加入 batch_size，batch_first=True)
# ============================

class BasicLSTM(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(BasicLSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size

        # 初始化权重和偏置 (4 个门拼接后的矩阵)
        self.W = np.random.randn(4 * hidden_size, input_size)  # (4h, n)
        self.U = np.random.randn(4 * hidden_size, hidden_size)  # (4h, h)
        self.b = np.zeros((4 * hidden_size, 1))  # (4h, 1)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def tanh(self, x):
        return np.tanh(x)

    def forward(self, x_seq):  # x_seq 形状: (batch_size, seq_len, input_size)
        batch_size, seq_len, _ = x_seq.shape

        # 初始化隐藏状态和细胞状态，形状为 (batch_size, hidden_size)
        h, c = np.zeros((batch_size, self.hidden_size))

        for t in range(seq_len):  # 遍历时间步
            x = x_seq[:, t, :]  # 取出当前时间步的所有 batch 数据，形状 (batch_size, input_size)

            # 线性变换 (batch_size, 4h)
            z = np.dot(x, self.W.T) + np.dot(h, self.U.T) + self.b.T

            # 切分出四个门 (每个门维度为 (batch_size, hidden_size))
            f = self.sigmoid(z[:, :self.hidden_size])               # 遗忘门 (batch_size, hidden_size)
            i = self.sigmoid(z[:, self.hidden_size:2*self.hidden_size])  # 输入门 (batch_size, hidden_size)
            c_tilde = self.tanh(z[:, 2*self.hidden_size:3*self.hidden_size])  # 候选状态 (batch_size, hidden_size)
            o = self.sigmoid(z[:, 3*self.hidden_size:])            # 输出门 (batch_size, hidden_size)

            # 更新细胞状态和隐藏状态
            c = f * c + i * c_tilde  # 细胞状态 (batch_size, hidden_size)
            h = o * self.tanh(c)     # 隐藏状态 (batch_size, hidden_size)

        return h, c  # 返回最终的隐藏状态与记忆神经元 (batch_size, hidden_size)

# 输入数据 (batch_size=2, seq_len=3, input_size=2)
x_seq = np.array([
    [[0.5, -0.2], [0.1, 0.4], [-0.3, 0.8]],  # 第一个样本
    [[-0.1, 0.3], [0.4, -0.5], [0.6, 0.2]]   # 第二个样本
])

lstm_basic = BasicLSTM(input_size=2, hidden_size=4)
output_basic = lstm_basic.forward(x_seq)
print("基础流程 LSTM 输出：", output_basic)


基础流程 LSTM 输出： (array([[-0.08845272, -0.32381897, -0.28599911,  0.03722394],
       [-0.26984503,  0.14548591,  0.11242097, -0.24239174]]), array([[-0.15433578, -0.74551582, -0.36123616,  0.14815187],
       [-0.40645539,  0.33771736,  0.17450061, -0.60444706]]))


In [11]:
# ============================
# 高级 API (PyTorch) 实现 LSTM
# ============================


class HighLevelLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(HighLevelLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, (h_n, c_n) = self.lstm(x)  # LSTM 输出 + 最后的隐藏状态和细胞状态
        print("LSTM输出:", out)
        out = self.fc(out[:, -1, :])  # 取最后一个时间步的输出进行全连接
        print("隐藏层数值是:", h_n)
        print("记忆神经元数值是:", c_n)
        return out


# 输入数据（batch_size=1, seq_len=3, input_size=2）
x_torch = torch.tensor(
    [[[0.5, -0.2], [0.1, 0.4], [-0.3, 0.8]], [[0.6, -0.1], [0.2, 0.5], [-0.2, 0.9]]],
    dtype=torch.float32,
)

lstm_high = HighLevelLSTM(input_size=2, hidden_size=4, output_size=1)
output_high = lstm_high(x_torch)
print("高级 API LSTM 输出：", output_high.detach().numpy().flatten())


LSTM输出: tensor([[[-0.1034, -0.0592, -0.0791, -0.1689],
         [-0.1429, -0.0698, -0.0909, -0.2653],
         [-0.1578, -0.0617, -0.0863, -0.3140]],

        [[-0.1025, -0.0597, -0.0848, -0.1668],
         [-0.1413, -0.0693, -0.0978, -0.2631],
         [-0.1555, -0.0604, -0.0934, -0.3119]]], grad_fn=<TransposeBackward0>)
隐藏层数值是: tensor([[[-0.1578, -0.0617, -0.0863, -0.3140],
         [-0.1555, -0.0604, -0.0934, -0.3119]]], grad_fn=<StackBackward0>)
记忆神经元数值是: tensor([[[-0.5589, -0.1285, -0.2860, -0.8364],
         [-0.5630, -0.1262, -0.3085, -0.8338]]], grad_fn=<StackBackward0>)
高级 API LSTM 输出： [0.34141046 0.34165925]
