In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

class SharedLayer(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SharedLayer, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.linear(x)

class MLPWithSharedLayer(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLPWithSharedLayer, self).__init__()
        self.shared_layer = SharedLayer(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = torch.relu(self.shared_layer(x))
        x = torch.relu(self.shared_layer(x))
        x = self.fc2(x)
        return x


In [6]:
# 初始化模型、损失函数和优化器
input_dim = 5
hidden_dim = 5
output_dim = 1

model = MLPWithSharedLayer(input_dim, hidden_dim, output_dim)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)


In [7]:
# 生成一些假数据
batch_size = 8
x = torch.randn(batch_size, input_dim)
y = torch.randn(batch_size, output_dim)

# 训练模型
num_epochs = 5
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(x)
    loss = criterion(outputs, y)
    loss.backward()
    optimizer.step()

    print(f'Epoch {epoch+1}/{num_epochs}')
    print('Shared Layer Weights:', model.shared_layer.linear.weight.data)
    print('Shared Layer Gradients:', model.shared_layer.linear.weight.grad)
    print('FC2 Layer Weights:', model.fc2.weight.data)
    print('FC2 Layer Gradients:', model.fc2.weight.grad)


Epoch 1/5
Shared Layer Weights: tensor([[-0.1088,  0.0437, -0.3625, -0.0717, -0.3809],
        [ 0.2485,  0.1476, -0.2140, -0.4009, -0.1683],
        [ 0.1301, -0.1307, -0.1414, -0.2080,  0.3125],
        [ 0.4131, -0.1830, -0.2928,  0.4037,  0.1550],
        [ 0.0461, -0.0243, -0.3075,  0.2651,  0.3319]])
Shared Layer Gradients: tensor([[ 0.1532,  0.1771, -0.0042, -0.0602,  0.0055],
        [ 0.0171,  0.0438,  0.0080, -0.0446,  0.0012],
        [ 0.1018,  0.1185,  0.0362,  0.0217,  0.0201],
        [-0.0369, -0.1619,  0.0550,  0.0242, -0.0498],
        [-0.0071,  0.0245, -0.0069,  0.0100,  0.0151]])
FC2 Layer Weights: tensor([[-0.3894, -0.0975, -0.1882,  0.0641,  0.3510]])
FC2 Layer Gradients: tensor([[ 0.0127, -0.2844, -0.0861, -0.0972,  0.0000]])
Epoch 2/5
Shared Layer Weights: tensor([[-0.1104,  0.0420, -0.3624, -0.0712, -0.3810],
        [ 0.2484,  0.1472, -0.2141, -0.4005, -0.1683],
        [ 0.1291, -0.1319, -0.1417, -0.2082,  0.3123],
        [ 0.4135, -0.1814, -0.2934,  0.4035

In [12]:
import torch
from d2l import torch as d2l
nums = 10000
x_cpu = torch.ones(nums,nums)
x_gpu = torch.ones(nums,nums, device='cuda')

timer1 = d2l.Timer()
torch.mm(x_cpu,x_cpu)
print("cpu cost time {:.5f}".format(float(timer1.stop())))

timer2 = d2l.Timer()
torch.mm(x_gpu,x_gpu)
print("gpu cost time {:.5f}".format(float(timer2.stop())))

cpu cost time 22.66202
gpu cost time 0.30718
