# view() 方法测试

代码段1

```python
y = torch.matmul(attention_weights, V)  # [batch_size * seq_len, num_heads, d_v]
y = y.view(-1, self.num_heads * self.d_v)  # [batch_size * seq_len, d_v * num_heads]
y = self.W(y)  # 全连接层输入维度为 d_v*num_heads → d_model
y = y.view(-1, x.size(1), self.d_model)  # 恢复为 [batch_size, seq_len, d_model]
```

代码段 2

```python
y = torch.matmul(attention_weights, V)  # [batch_size * seq_len, num_heads, d_v]
y = y.view(-1, seq_len, self.num_heads * self.d_v)  # [batch_size, seq_len, d_v*num_heads]
y = self.W(y)  # 全连接层输入维度为 d_v*num_heads → d_model
```
这两段代码的输出形状数值是否一致

In [2]:
import torch
import torch.nn as nn

# 参数设置
batch_size = 2
seq_len = 5
num_heads = 4
d_v = 8
d_model = 32

# 初始化相同的权重（确保公平比较）
torch.manual_seed(42)
W = nn.Linear(num_heads * d_v, d_model)

# 模拟输入（假设 attention_weights 和 V 已计算）
attention_weights = torch.randn(batch_size * seq_len, num_heads, num_heads)
V = torch.randn(batch_size * seq_len, num_heads, d_v)

# 第一段代码
def code1():
    y1 = torch.matmul(attention_weights, V)
    y1 = y1.view(-1, num_heads * d_v)
    y1 = W(y1)
    y1 = y1.view(-1, seq_len, d_model)
    return y1

# 第二段代码
def code2():
    y2 = torch.matmul(attention_weights, V)
    y2 = y2.view(-1, seq_len, num_heads * d_v)
    y2 = W(y2)
    return y2

# 运行并比较结果
output1 = code1()
output2 = code2()

# 检查形状和数值是否一致
print("形状是否相同:", output1.shape == output2.shape)  # 输出: True
print("数值差异是否接近零:", torch.allclose(output1, output2, atol=1e-6))  # 输出: True

形状是否相同: True
数值差异是否接近零: True


In [5]:
import numpy as np
import torch

x = np.random.rand(2, 3, 4)
# flip 沿着第 0 轴进行翻转
y = np.flip(x, axis=0)
y_copy = y.copy()
torch.from_numpy(y_copy)

tensor([[[0.7882, 0.3674, 0.0208, 0.4562],
         [0.8598, 0.1526, 0.0218, 0.6150],
         [0.7079, 0.4024, 0.5637, 0.2014]],

        [[0.5853, 0.0600, 0.5856, 0.4186],
         [0.6840, 0.4798, 0.1205, 0.2950],
         [0.7061, 0.8781, 0.9906, 0.3617]]], dtype=torch.float64)