### 1. 实现并行模块（ParallelModule

In [1]:
import torch
from torch import nn

In [2]:
class ParallelModule(nn.Module):
    def __init__(self):
        super().__init__()
        self.net1 = nn.Sequential(nn.Linear(5, 3), nn.ReLU(),
                                  nn.Linear(3,3), nn.ReLU(),
                                nn.Linear(3,2), nn.ReLU())
        self.net2 = nn.Sequential(nn.Linear(5, 16), nn.ReLU(),
                                nn.Linear(16,2), nn.ReLU())
        
    def forward(self, x):
        return (self.net1(x) + self.net2(x))/2
    

ParallelModule_re = ParallelModule()
X = torch.rand(2, 5)  # 2 samples, 5 features
print(ParallelModule_re(X))
ParallelModule_re(X).shape

tensor([[0.0000, 0.0290],
        [0.0000, 0.0000]], grad_fn=<DivBackward0>)


torch.Size([2, 2])

可见，使用`net1`: `(input[5,3], hidden[3,3],output[3,2])`，`net2`:`(input[5,3],output[3,2])`, 在`forward` 中拼接，输出为`[x,2]`

### 2.构建包含共享参数层的多层感知机

In [3]:
class SharedWeightNet(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.shared_linear = nn.Linear(5, 3)
        
        self.net1 = nn.Sequential(
            self.shared_linear, nn.ReLU(),
            nn.Linear(3, 3), nn.ReLU(),
            nn.Linear(3, 2), nn.ReLU()
        )
        self.net2 = nn.Sequential(
            self.shared_linear,  
            nn.Linear(3, 16), nn.ReLU(),
            nn.Linear(16, 2), nn.ReLU()
        )
    
    def forward(self, x):
        return (self.net1(x) + self.net2(x)) / 2 

ParallelModule_re = ParallelModule()
X = torch.rand(2, 5)  # 2 samples, 5 features
print(ParallelModule_re(X))
ParallelModule_re(X).shape

tensor([[0.0000, 0.2077],
        [0.0000, 0.2764]], grad_fn=<DivBackward0>)


torch.Size([2, 2])

In [4]:

model = SharedWeightNet()
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)


X = torch.rand(2, 5)
y_true = torch.rand(2, 2)


for epoch in range(3):
    optimizer.zero_grad()
    
    y_pred = model(X)
    loss = criterion(y_pred, y_true)
    
    loss.backward()
    
    shared_params = list(model.shared_linear.parameters())
    print(f"\nEpoch {epoch+1}")
    print(f"Shared Layer Weight: {shared_params[0].data[:2]}")
    print(f"Shared Layer Grad: {shared_params[0].grad[:2] if shared_params[0].grad is not None else 'No grad'}")
    
    optimizer.step()

  from .autonotebook import tqdm as notebook_tqdm



Epoch 1
Shared Layer Weight: tensor([[ 0.1729,  0.3095, -0.2705,  0.3356, -0.1239],
        [-0.3827, -0.0933, -0.2389, -0.0820, -0.0832]])
Shared Layer Grad: tensor([[-0.0042, -0.0070, -0.0058, -0.0051, -0.0054],
        [ 0.0049,  0.0135,  0.0078,  0.0106,  0.0082]])

Epoch 2
Shared Layer Weight: tensor([[ 0.1729,  0.3095, -0.2705,  0.3356, -0.1239],
        [-0.3828, -0.0934, -0.2390, -0.0821, -0.0833]])
Shared Layer Grad: tensor([[-0.0042, -0.0069, -0.0058, -0.0051, -0.0054],
        [ 0.0048,  0.0132,  0.0077,  0.0104,  0.0081]])

Epoch 3
Shared Layer Weight: tensor([[ 0.1730,  0.3096, -0.2704,  0.3357, -0.1238],
        [-0.3828, -0.0936, -0.2390, -0.0822, -0.0834]])
Shared Layer Grad: tensor([[-0.0042, -0.0069, -0.0058, -0.0051, -0.0054],
        [ 0.0048,  0.0130,  0.0076,  0.0103,  0.0080]])
