In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import random
import torch.nn.functional as F


class SimpleMLP(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(SimpleMLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x


# 1. 检查CUDA是否可用
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 2. 定义简单的多层感知机

seed = 3
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
random.seed(seed)


In [58]:
# 训练代码（不包括权重）
# 加载MNIST数据集
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = torchvision.datasets.MNIST(
    root='./data', train=True, transform=transform, download=True)
train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset, batch_size=100, shuffle=True)

test_dataset = torchvision.datasets.MNIST(
    root='./data', train=False, transform=transform)
test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset, batch_size=100, shuffle=False)

# 4. 定义损失函数和优化器
model = SimpleMLP(784, 500, 10).to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.005)

# 5. 训练网络
num_epochs = 5
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if (i+1) % 100 == 0:
            print(
                f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

# 6. 测试网络的性能
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    print(
        f'Accuracy of the model on the 10000 test images: {100 * correct / total} %')

# 7. 保存网络权重
torch.save(model.state_dict(), f'./{seed}_{100 * correct / total}%.pth')


Epoch [1/5], Step [100/600], Loss: 0.3016
Epoch [1/5], Step [200/600], Loss: 0.3982
Epoch [1/5], Step [300/600], Loss: 0.3141
Epoch [1/5], Step [400/600], Loss: 0.1220
Epoch [1/5], Step [500/600], Loss: 0.2182
Epoch [1/5], Step [600/600], Loss: 0.1779
Epoch [2/5], Step [100/600], Loss: 0.1662
Epoch [2/5], Step [200/600], Loss: 0.2573
Epoch [2/5], Step [300/600], Loss: 0.1640
Epoch [2/5], Step [400/600], Loss: 0.1269
Epoch [2/5], Step [500/600], Loss: 0.0724
Epoch [2/5], Step [600/600], Loss: 0.1557
Epoch [3/5], Step [100/600], Loss: 0.0964
Epoch [3/5], Step [200/600], Loss: 0.0728
Epoch [3/5], Step [300/600], Loss: 0.1012
Epoch [3/5], Step [400/600], Loss: 0.1234
Epoch [3/5], Step [500/600], Loss: 0.0685
Epoch [3/5], Step [600/600], Loss: 0.2103
Epoch [4/5], Step [100/600], Loss: 0.1878
Epoch [4/5], Step [200/600], Loss: 0.1601
Epoch [4/5], Step [300/600], Loss: 0.1913
Epoch [4/5], Step [400/600], Loss: 0.0148
Epoch [4/5], Step [500/600], Loss: 0.0510
Epoch [4/5], Step [600/600], Loss:

In [11]:
# 修改网络，将可学习权重加进网络当中
class CombinedMLP(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, state_dicts):
        super(CombinedMLP, self).__init__()
        num_weights = len(state_dicts)

        # 动态创建权重列表
        self.weights = nn.ParameterList(
            [nn.Parameter(torch.tensor(1.0)) for _ in range(num_weights)])

        self.pretrained_weights = state_dicts
        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.view(x.size(0), -1)

        # 使用softmax进行归一化
        norm_weights = F.softmax(torch.stack([w for w in self.weights]), dim=0)

        combined_weight1 = sum([norm_weights[i] * self.pretrained_weights[i]
                               ['fc1.weight'] for i in range(len(self.weights))])
        combined_bias1 = sum([norm_weights[i] * self.pretrained_weights[i]
                             ['fc1.bias'] for i in range(len(self.weights))])
        x = F.linear(x, combined_weight1, combined_bias1)

        x = self.relu(x)

        combined_weight2 = sum([norm_weights[i] * self.pretrained_weights[i]
                               ['fc2.weight'] for i in range(len(self.weights))])
        combined_bias2 = sum([norm_weights[i] * self.pretrained_weights[i]
                             ['fc2.bias'] for i in range(len(self.weights))])
        x = F.linear(x, combined_weight2, combined_bias2)

        return x


In [56]:
import time
import numpy as np


# 加载并存储预训练模型的权重
state_dict1 = torch.load("1_96.13%.pth")
state_dict2 = torch.load("2_95.8%.pth")
state_dict3 = torch.load("3_95.94%.pth")
state_dict4 = torch.load("1_92.17%.pth")
state_dict5 = torch.load("2_92.05%.pth")
state_dict6 = torch.load("3_92.03%.pth")

# state_dict = [state_dict1, state_dict2,
#               state_dict4, state_dict5]

# state_dict = [state_dict1, state_dict2, state_dict3]
state_dict = [state_dict1, state_dict2, state_dict3,
              state_dict4, state_dict5, state_dict6]

# 测试模型性能模块
def test_model(model, test_loader):
    model.eval()

    # 推理并计算准确度
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Accuracy on the test set: {accuracy:.2f}%")
    return accuracy

best_accuracy = 0

# 从不同的随机种子中选出最好的权重
for seed in range(2, 3):
    print("SEED:",seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

    model = CombinedMLP(
        28*28, 500, 10, state_dict).to(device)

    epoch_times = []

    # 训练加权融合的模型
    transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
    train_dataset = torchvision.datasets.MNIST(
        root='./data', train=True, transform=transform, download=True)
    train_loader = torch.utils.data.DataLoader(
        dataset=train_dataset, batch_size=100, shuffle=True)
    transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
    test_dataset = torchvision.datasets.MNIST(
        root='./data', train=False, transform=transform)
    test_loader = torch.utils.data.DataLoader(
        dataset=test_dataset, batch_size=100, shuffle=False)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.weights.parameters(), lr=0.4)

    NUM_BATCHES = 1  # 指定要使用的batch数
    STEPS_PER_BATCH = 10  # 指定每个batch的迭代次数

    num_epochs = 1

    for epoch in range(num_epochs):  # loop over the dataset multiple times
        total_loss = 0.0
        start_time = time.time()
        for i, data in enumerate(train_loader, 0):
            if i >= NUM_BATCHES:
                break

            # 打印直接merge不学习的推理结果
            # if i == 0:
            #     accuracy = test_model(model, test_loader)
            #     print('no learning accuracy:', accuracy)
            inputs, labels = data[0].to(device), data[1].to(device)

            for step in range(STEPS_PER_BATCH):
                outputs = model(inputs)

                _, pseudo_labels = torch.max(outputs.data, 1)
                loss = criterion(outputs, pseudo_labels)
                # loss = criterion(outputs, labels)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                total_loss += loss.item()

                print(
                    f"Epoch [{epoch+1}/{num_epochs}], Batch [{i+1}/{NUM_BATCHES}], Step [{step+1}/{STEPS_PER_BATCH}], Loss: {loss.item():.4f}")

            # if (i+1) % 300 == 0:
            #     print(
            #         f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
                # accuracy = test_model(model, test_loader)
                # print('accuracy:', accuracy)

        # 计算每个epoch的时间
        end_time = time.time()
        elapsed_time = end_time - start_time
        epoch_times.append(elapsed_time)

        # 打印每个epoch结束后的推理结果
        accuracy = test_model(model, test_loader)
        norm_weights = F.softmax(torch.stack([w for w in model.weights]), dim=0)
        print("Weights:", norm_weights)

        for i, weight in enumerate(model.weights):
            print(f"Weight {i+1}: {weight.item()}")

        # 打印每轮epoch的average loss
        average_loss = total_loss / len(train_loader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {average_loss:.4f}')




    accuracy = test_model(model, test_loader)
    print(f"Accuracy on the test set: {accuracy:.2f}%")
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_weights = norm_weights
        best_seed = seed

    mean_time = np.mean(epoch_times)
    var_time = np.var(epoch_times)

    print(f"Average epoch training time: {mean_time:.2f} seconds.")
    print(f"Variance of epoch training time: {var_time:.2f} seconds^2.")

# 保存最好权重的值
print(best_weights)
torch.save({"weights": best_weights.tolist()}, "weights.pth")


SEED: 2
Epoch [1/1], Batch [1/1], Step [1/10], Loss: 1.8425
Epoch [1/1], Batch [1/1], Step [2/10], Loss: 1.4905
Epoch [1/1], Batch [1/1], Step [3/10], Loss: 1.1052
Epoch [1/1], Batch [1/1], Step [4/10], Loss: 0.8402
Epoch [1/1], Batch [1/1], Step [5/10], Loss: 0.7009
Epoch [1/1], Batch [1/1], Step [6/10], Loss: 0.5673
Epoch [1/1], Batch [1/1], Step [7/10], Loss: 0.2969
Epoch [1/1], Batch [1/1], Step [8/10], Loss: 0.1197
Epoch [1/1], Batch [1/1], Step [9/10], Loss: 0.0718
Epoch [1/1], Batch [1/1], Step [10/10], Loss: 0.0550
Accuracy on the test set: 96.01%
Weights: tensor([9.4425e-01, 5.2033e-02, 1.0370e-03, 9.6040e-04, 8.4228e-04, 8.7900e-04],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Weight 1: 4.791115760803223
Weight 2: 1.8925962448120117
Weight 3: -2.0229787826538086
Weight 4: -2.099679946899414
Weight 5: -2.2309117317199707
Weight 6: -2.1882479190826416
Epoch [1/1], Average Loss: 0.0118
Accuracy on the test set: 96.01%
Accuracy on the test set: 96.01%
Average epoch traini

In [25]:
state_dict1 = torch.load("1_96.13%.pth")
state_dict2 = torch.load("2_95.8%.pth")
state_dict3 = torch.load("3_95.94%.pth")
state_dict4 = torch.load("1_92.17%.pth")
state_dict5 = torch.load("2_92.05%.pth")
state_dict6 = torch.load("3_92.03%.pth")

state_dict = [state_dict1, state_dict2, state_dict3,
              state_dict4, state_dict5, state_dict6]

model = CombinedMLP(
       28*28, 500, 10, state_dict).to(device)

specific_weights = [3.999155044555664,
                    0.18632793426513672, -1.046790599822998, 2.2161786556243896, -1.1982409954071045, -1.0864580869674683]

for i, weight_value in enumerate(specific_weights):
    model.weights[i].data = torch.tensor(weight_value).to(device)

accuracy = test_model(model, test_loader)
print(f"Accuracy on the test set: {accuracy:.2f}%")


Accuracy on the test set: 96.22%
Accuracy on the test set: 96.22%
