In [1]:
import paddle
import torch

device = "cpu"
paddle.set_device("gpu" if device == "cuda" else "cpu")

paddle.set_grad_enabled(False)

# Define the linear layer.
weight_attr = paddle.ParamAttr(
    name="weight",
    initializer=paddle.nn.initializer.Constant(value=0.5))
bias_attr = paddle.ParamAttr(
    name="bias",
    initializer=paddle.nn.initializer.Constant(value=1.0))
linear = paddle.nn.Linear(2, 4, weight_attr=weight_attr, bias_attr=bias_attr)
print(linear.weight)
print(linear.bias)
# linear.weight: [[0.5 0.5 0.5 0.5]
#                 [0.5 0.5 0.5 0.5]]
# linear.bias: [1. 1. 1. 1.]

x = paddle.randn((3, 2), dtype="float32")
print(x)
# x: [[-0.32342386 -1.200079  ]
#     [ 0.7979031  -0.90978354]
#     [ 0.40597573  1.8095392 ]]
y = linear(x)
print(y)
# y: [[0.23824859 0.23824859 0.23824859 0.23824859]
#     [0.9440598  0.9440598  0.9440598  0.9440598 ]
#     [2.1077576  2.1077576  2.1077576  2.1077576 ]]

Parameter containing:
Tensor(shape=[2, 4], dtype=float32, place=Place(cpu), stop_gradient=False,
       [[0.50000000, 0.50000000, 0.50000000, 0.50000000],
        [0.50000000, 0.50000000, 0.50000000, 0.50000000]])
Parameter containing:
Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=False,
       [1., 1., 1., 1.])
Tensor(shape=[3, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
       [[ 0.39658180, -0.69657004],
        [-0.33478394,  1.94147193],
        [-0.00060682,  0.55379170]])
Tensor(shape=[3, 4], dtype=float32, place=Place(cpu), stop_gradient=False,
       [[0.85000587, 0.85000587, 0.85000587, 0.85000587],
        [1.80334401, 1.80334401, 1.80334401, 1.80334401],
        [1.27659249, 1.27659249, 1.27659249, 1.27659249]])


In [6]:
import paddle

device = "cuda"
paddle.set_device("gpu" if device == "cuda" else "cpu")
paddle.set_default_dtype("float64")
paddle.set_grad_enabled(False)

# Define the linear layer.
weight_attr = paddle.ParamAttr(
    name="weight",
    initializer=paddle.nn.initializer.Constant(value=0.5))
bias_attr = paddle.ParamAttr(
    name="bias",
    initializer=paddle.nn.initializer.Constant(value=1.0))
linear = paddle.nn.Linear(2, 4)
print(linear.weight)
print(linear.bias)
# linear.weight: [[0.5 0.5 0.5 0.5]
#                 [0.5 0.5 0.5 0.5]]
# linear.bias: [1. 1. 1. 1.]

x = paddle.randn((3, 2), dtype="float64")
print(x)
# x: [[-0.32342386 -1.200079  ]
#     [ 0.7979031  -0.90978354]
#     [ 0.40597573  1.8095392 ]]
y = linear(x)
print(y)
# y: [[0.23824859 0.23824859 0.23824859 0.23824859]
#     [0.9440598  0.9440598  0.9440598  0.9440598 ]
#     [2.1077576  2.1077576  2.1077576  2.1077576 ]]

Parameter containing:
Tensor(shape=[2, 4], dtype=float64, place=Place(gpu:0), stop_gradient=False,
       [[0., 0., 0., 0.],
        [0., 0., 0., 0.]])
Parameter containing:
Tensor(shape=[4], dtype=float64, place=Place(gpu:0), stop_gradient=False,
       [0., 0., 0., 0.])
Tensor(shape=[3, 2], dtype=float64, place=Place(gpu:0), stop_gradient=True,
       [[0., 0.],
        [0., 0.],
        [0., 0.]])
Tensor(shape=[3, 4], dtype=float64, place=Place(gpu:0), stop_gradient=False,
       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])


In [5]:
import paddle
import torch
import numpy as np
# 这是cpu环境下。修改成gpu再试试

device = "cuda"
paddle.set_device("gpu" if device == "cuda" else "cpu")

paddle.set_grad_enabled(False)
torch.set_grad_enabled(False)

# 定义模型
paddle_linear = paddle.nn.Linear(768, 768)
paddle_linear.eval()
paddle_linear.weight.set_value(paddle.randn(paddle_linear.weight.shape))
paddle_linear.bias.set_value(paddle.randn(paddle_linear.bias.shape))
# 定义模型
torch_linear = torch.nn.Linear(768, 768)
torch_linear.eval()
torch_linear.weight.data = torch.tensor(paddle_linear.weight.t().numpy())
torch_linear.bias.data = torch.tensor(paddle_linear.bias.numpy())
torch_linear.to(device)
# 定义输入
paddle_inputs = paddle.randn((32, 768)) * 10
torch_inputs = torch.tensor(paddle_inputs.numpy()).to(device)

# 前向输出
paddle_outputs = paddle_linear(paddle_inputs).numpy()
torch_outputs = torch_linear(torch_inputs).cpu().numpy()

print("使用forward方法。")
print("abs max:", np.abs(paddle_outputs-torch_outputs).max())
print("abs mean:", np.abs(paddle_outputs-torch_outputs).mean())
print("isclose:", np.all(np.isclose(paddle_outputs, torch_outputs, atol=0, rtol=1.e-6)))

# paddle matmul与torch matmul对比
print("paddle matmul与torch matmul对比")
paddle_shougong_outputs = (paddle.matmul(
    paddle_inputs, paddle_linear.weight) + paddle_linear.bias).numpy()
torch_shougong_outputs = (torch.matmul(
    torch_inputs, torch_linear.weight.t()) + torch_linear.bias).cpu().numpy()
print("abs max:", np.abs(paddle_shougong_outputs-torch_shougong_outputs).max())
print("abs mean:", np.abs(paddle_shougong_outputs-torch_shougong_outputs).mean())
print("isclose:", np.all(np.isclose(paddle_shougong_outputs,
      torch_shougong_outputs, atol=0, rtol=1.e-6)))


# paddle forward与paddle matmul对比
print("paddle forward与paddle matmul对比")
print("abs max:", np.abs(paddle_shougong_outputs-paddle_outputs).max())
print("abs mean:", np.abs(paddle_shougong_outputs-paddle_outputs).mean())
print("isclose:", np.all(np.isclose(
    paddle_shougong_outputs, paddle_outputs, atol=0, rtol=1.e-6)))


# paddle forward与torch matmul对比
print("paddle forward与torch matmul对比")
print("abs max:", np.abs(paddle_outputs-torch_shougong_outputs).max())
print("abs mean:", np.abs(paddle_outputs-torch_shougong_outputs).mean())
print("isclose:", np.all(np.isclose(paddle_outputs,
      torch_shougong_outputs, atol=0, rtol=1.e-6)))

# torch forward与torch matmul对比
print("torch forward与torch matmul对比")
print("abs max:", np.abs(torch_outputs-torch_shougong_outputs).max())
print("abs mean:", np.abs(torch_outputs-torch_shougong_outputs).mean())
print("isclose:", np.all(np.isclose(torch_outputs,
      torch_shougong_outputs, atol=0, rtol=1.e-6)))

# 输出
# cpu条件下
# 使用forward方法。
# abs max: 0.00012207031
# abs mean: 7.5253715e-06
# isclose: False
# paddle matmul与torch matmul对比
# abs max: 0.0
# abs mean: 0.0
# isclose: True
# paddle forward与paddle matmul对比
# abs max: 0.0
# abs mean: 0.0
# isclose: True
# paddle forward与torch matmul对比
# abs max: 0.0
# abs mean: 0.0
# isclose: True
# torch forward与torch matmul对比
# abs max: 0.00012207031
# abs mean: 7.5253715e-06
# isclose: False

# gpu条件下（RTX2060）
# 使用forward方法。
# abs max: 0.0
# abs mean: 0.0
# isclose: True
# paddle matmul与torch matmul对比
# abs max: 0.0
# abs mean: 0.0
# isclose: True
# paddle forward与paddle matmul对比
# abs max: 0.0
# abs mean: 0.0
# isclose: True
# paddle forward与torch matmul对比
# abs max: 0.0
# abs mean: 0.0
# isclose: True
# torch forward与torch matmul对比
# abs max: 0.0
# abs mean: 0.0
# isclose: True

使用forward方法。
abs max: 0.0
abs mean: 0.0
isclose: True
paddle matmul与torch matmul对比
abs max: 0.0
abs mean: 0.0
isclose: True
paddle forward与paddle matmul对比
abs max: 0.0
abs mean: 0.0
isclose: True
paddle forward与torch matmul对比
abs max: 0.0
abs mean: 0.0
isclose: True
torch forward与torch matmul对比
abs max: 0.0
abs mean: 0.0
isclose: True


In [6]:
import torch
import paddle

device = "cuda"
paddle.set_device("gpu" if device == "cuda" else "cpu")

paddle.set_grad_enabled(False)
torch.set_grad_enabled(False)

a = torch.randn(3)
b = torch.randn(3, 4)
print("a:", a)
print("b:", b)


c = paddle.randn((3, 1))
print("c:", c)

d = paddle.randn((3, 1), dtype="float64")
print("x:", d)
# x: [[-0.32342386 -1.200079  ]
#     [ 0.7979031  -0.90978354]
#     [ 0.40597573  1.8095392 ]]


a: tensor([1.2060, 0.2744, 0.5401])
b: tensor([[ 0.7321,  0.5893, -0.5360,  0.2156],
        [ 0.1993,  2.3670, -0.1326, -0.5050],
        [-0.4142, -0.5313, -0.9406,  0.2467]])
c: Tensor(shape=[3, 1], dtype=float32, place=Place(gpu:0), stop_gradient=True,
       [[0.],
        [0.],
        [0.]])
x: Tensor(shape=[3, 1], dtype=float64, place=Place(gpu:0), stop_gradient=True,
       [[0.],
        [0.],
        [0.]])
