In [57]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import random


class SimpleMLP(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(SimpleMLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x


# 1. 检查CUDA是否可用
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 2. 定义简单的多层感知机

seed = 3
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
random.seed(seed)


In [58]:
# 3. 加载MNIST数据集
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = torchvision.datasets.MNIST(
    root='./data', train=True, transform=transform, download=True)
train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset, batch_size=100, shuffle=True)

test_dataset = torchvision.datasets.MNIST(
    root='./data', train=False, transform=transform)
test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset, batch_size=100, shuffle=False)

# 4. 定义损失函数和优化器
model = SimpleMLP(784, 500, 10).to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.005)

# 5. 训练网络
num_epochs = 5
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if (i+1) % 100 == 0:
            print(
                f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

# 6. 测试网络的性能
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    print(
        f'Accuracy of the model on the 10000 test images: {100 * correct / total} %')

# 7. 保存网络权重
torch.save(model.state_dict(), f'./{seed}_{100 * correct / total}%.pth')


Epoch [1/5], Step [100/600], Loss: 0.3016
Epoch [1/5], Step [200/600], Loss: 0.3982
Epoch [1/5], Step [300/600], Loss: 0.3141
Epoch [1/5], Step [400/600], Loss: 0.1220
Epoch [1/5], Step [500/600], Loss: 0.2182
Epoch [1/5], Step [600/600], Loss: 0.1779
Epoch [2/5], Step [100/600], Loss: 0.1662
Epoch [2/5], Step [200/600], Loss: 0.2573
Epoch [2/5], Step [300/600], Loss: 0.1640
Epoch [2/5], Step [400/600], Loss: 0.1269
Epoch [2/5], Step [500/600], Loss: 0.0724
Epoch [2/5], Step [600/600], Loss: 0.1557
Epoch [3/5], Step [100/600], Loss: 0.0964
Epoch [3/5], Step [200/600], Loss: 0.0728
Epoch [3/5], Step [300/600], Loss: 0.1012
Epoch [3/5], Step [400/600], Loss: 0.1234
Epoch [3/5], Step [500/600], Loss: 0.0685
Epoch [3/5], Step [600/600], Loss: 0.2103
Epoch [4/5], Step [100/600], Loss: 0.1878
Epoch [4/5], Step [200/600], Loss: 0.1601
Epoch [4/5], Step [300/600], Loss: 0.1913
Epoch [4/5], Step [400/600], Loss: 0.0148
Epoch [4/5], Step [500/600], Loss: 0.0510
Epoch [4/5], Step [600/600], Loss:

In [8]:
torch.cuda.is_available()


True

In [95]:
class CombinedMLP(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, state_dicts):
        super(CombinedMLP, self).__init__()
        self.alpha = nn.Parameter(torch.tensor([0.0]))
        self.beta = nn.Parameter(torch.tensor(torch.log(torch.tensor([2.0]))))
        self.gamma = nn.Parameter(torch.tensor([0.0]))

        self.pretrained_weights = state_dicts
        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.view(x.size(0), -1)

        # 归一化权重
        norm_weights = F.softmax(torch.stack(
            [self.alpha, self.beta, self.gamma]), dim=0)

        combined_weight1 = norm_weights[0] * self.pretrained_weights[0]['fc1.weight'] + norm_weights[1] * \
            self.pretrained_weights[1]['fc1.weight'] + \
            norm_weights[2] * self.pretrained_weights[2]['fc1.weight']
        combined_bias1 = norm_weights[0] * self.pretrained_weights[0]['fc1.bias'] + norm_weights[1] * \
            self.pretrained_weights[1]['fc1.bias'] + \
            norm_weights[2] * self.pretrained_weights[2]['fc1.bias']
        x = F.linear(x, combined_weight1, combined_bias1)

        x = self.relu(x)

        combined_weight2 = norm_weights[0] * self.pretrained_weights[0]['fc2.weight'] + norm_weights[1] * \
            self.pretrained_weights[1]['fc2.weight'] + \
            norm_weights[2] * self.pretrained_weights[2]['fc2.weight']
        combined_bias2 = norm_weights[0] * self.pretrained_weights[0]['fc2.bias'] + norm_weights[1] * \
            self.pretrained_weights[1]['fc2.bias'] + \
            norm_weights[2] * self.pretrained_weights[2]['fc2.bias']
        x = F.linear(x, combined_weight2, combined_bias2)

        return x


In [96]:
# 加载并存储预训练模型的权重
state_dict1 = torch.load("1_96.13%.pth")
state_dict2 = torch.load("2_95.8%.pth")
state_dict3 = torch.load("3_95.94%.pth")


def test_model(model, test_loader):
    model.eval()

    # 推理并计算准确度
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Accuracy on the test set: {accuracy:.2f}%")
    return accuracy

best_accuracy = 0

# 从不同的随机种子中选出最好的权重
for seed in range(1, 2):
    print("SEED:",seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

    model = CombinedMLP(
        28*28, 500, 10, [state_dict1, state_dict2, state_dict3]).to(device)


    # 训练加权融合的模型
    transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
    train_dataset = torchvision.datasets.MNIST(
        root='./data', train=True, transform=transform, download=True)
    train_loader = torch.utils.data.DataLoader(
        dataset=train_dataset, batch_size=100, shuffle=True)
    transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
    test_dataset = torchvision.datasets.MNIST(
        root='./data', train=False, transform=transform)
    test_loader = torch.utils.data.DataLoader(
        dataset=test_dataset, batch_size=100, shuffle=False)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam([model.alpha, model.beta, model.gamma], lr=0.06)

    num_epochs = 5

    for epoch in range(num_epochs):  # loop over the dataset multiple times
        for i, data in enumerate(train_loader, 0):
            # if i == 0:
            #     accuracy = test_model(model, test_loader)
            #     print('accuracy:', accuracy)
            inputs, labels = data[0].to(device), data[1].to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            # print(alpha.grad, beta.grad, gamma.grad)
            # print(weights[0].grad, weights[1].grad, weights[2].grad)
            optimizer.step()
            # print("alpha:", model.alpha, "beta:",
            #       model.beta, "gamma:", model.gamma)

            if (i+1) % 300 == 0:
                print(
                    f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
                accuracy = test_model(model, test_loader)
                print('accuracy:', accuracy)

        accuracy = test_model(model, test_loader)
        norm_weights = F.softmax(torch.stack(
            [model.alpha, model.beta, model.gamma]), dim=0)
        print("alpha:", norm_weights[0], "beta:",
              norm_weights[1], "gamma:", norm_weights[2])


    accuracy = test_model(model, test_loader)
    print(f"Accuracy on the test set: {accuracy:.2f}%")
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_alpha = norm_weights[0]
        best_beta = norm_weights[1]
        best_gamma = norm_weights[2]
        best_seed = seed

# 保存alpha和beta的值
print(best_alpha.item(), best_beta.item(), best_gamma.item(), best_seed)
torch.save({"alpha": best_alpha.item(), "beta": best_beta.item(),
           "gamma": best_gamma.item()}, "weights.pth")


SEED: 1


  """


Epoch [1/5], Step [300/600], Loss: 0.2149
Accuracy on the test set: 95.53%
accuracy: 95.53
Epoch [1/5], Step [600/600], Loss: 0.1736
Accuracy on the test set: 95.61%
accuracy: 95.61
Accuracy on the test set: 95.61%
alpha: tensor([0.0192], device='cuda:0', grad_fn=<SelectBackward0>) beta: tensor([0.9662], device='cuda:0', grad_fn=<SelectBackward0>) gamma: tensor([0.0146], device='cuda:0', grad_fn=<SelectBackward0>)
Epoch [2/5], Step [300/600], Loss: 0.2445
Accuracy on the test set: 95.63%
accuracy: 95.63
Epoch [2/5], Step [600/600], Loss: 0.1173
Accuracy on the test set: 95.60%
accuracy: 95.6
Accuracy on the test set: 95.60%
alpha: tensor([0.0270], device='cuda:0', grad_fn=<SelectBackward0>) beta: tensor([0.9618], device='cuda:0', grad_fn=<SelectBackward0>) gamma: tensor([0.0112], device='cuda:0', grad_fn=<SelectBackward0>)
Epoch [3/5], Step [300/600], Loss: 0.1603
Accuracy on the test set: 95.62%
accuracy: 95.62
Epoch [3/5], Step [600/600], Loss: 0.2002
Accuracy on the test set: 95.64%

In [21]:
loaded_weights = torch.load("weights.pth")
loaded_alpha = loaded_weights["alpha"]
loaded_beta = loaded_weights["beta"]
print("alpha:",loaded_alpha, "beta:",loaded_beta)

alpha: 0.17560148239135742 beta: -0.6090418100357056


In [26]:
seed = best_seed
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
random.seed(seed)

model1 = SimpleMLP(784, 500, 10).to(device)
model2 = SimpleMLP(784, 500, 10).to(device)
model3 = SimpleMLP(784, 500, 10).to(device)
model1.load_state_dict(torch.load("1_92.17%.pth"))
model2.load_state_dict(torch.load("2_92.05%.pth"))
model3.load_state_dict(torch.load("3_92.03%.pth"))
loaded_weights = torch.load("weights.pth")
loaded_alpha = loaded_weights["alpha"]
loaded_beta = loaded_weights["beta"]
loaded_gamma = loaded_weights["gamma"]
# loaded_alpha = torch.tensor([1.0], requires_grad=True).to(device)
# loaded_beta = torch.tensor([0.0], requires_grad=True).to(device)
print("alpha:", loaded_alpha, "beta:", loaded_beta, "gamma:", loaded_gamma)

transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
test_dataset = torchvision.datasets.MNIST(
    root='./data', train=False, transform=transform)
test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset, batch_size=100, shuffle=False)


# 加载权重
# model = SimpleMLP(784, 500, 10)
# model.load_state_dict(torch.load('11_92.05%.pth'))

# fusion_model = SimpleMLP(784, 500, 10).to(device)
# weighted_sum_parameters(fusion_model, model1, model2,
#                         model3, loaded_alpha, loaded_beta)
# model = fusion_model


model1.eval()
model2.eval()
model3.eval()

# 推理并计算准确度
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs1 = model1(images)
        outputs2 = model2(images)
        outputs3 = model3(images)

        weights = torch.softmax(torch.tensor(
            [loaded_alpha, loaded_beta, loaded_gamma], device=device), dim=0)

        # 计算加权和的输出
        outputs = weights[0] * outputs1 + weights[1] * \
            outputs2 + weights[2] * outputs3
        # outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Accuracy on the test set: {accuracy:.2f}%")


alpha: 0.8721233606338501 beta: 1.0146440267562866 gamma: 0.8551493287086487
Accuracy on the test set: 92.10%


In [30]:
def test_model(model, test_loader):
    model.eval()

    # 推理并计算准确度
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Accuracy on the test set: {accuracy:.2f}%")


In [None]:
state_dict1 = torch.load("1_92.17%.pth")
state_dict2 = torch.load("2_92.05%.pth")
state_dict3 = torch.load("3_92.03%.pth")


In [45]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

# 定义网络结构


class CombinedMLP(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, state_dicts):
        super(CombinedMLP, self).__init__()
        self.alpha = nn.Parameter(torch.randn(1))
        self.beta = nn.Parameter(torch.randn(1))
        self.gamma = nn.Parameter(torch.randn(1))

        self.pretrained_weights = state_dicts
        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.view(x.size(0), -1)

        # 归一化权重
        total = self.alpha + self.beta + self.gamma
        norm_alpha = self.alpha / total
        norm_beta = self.beta / total
        norm_gamma = self.gamma / total

        combined_weight1 = norm_alpha * self.pretrained_weights[0]['fc1.weight'] + norm_beta * \
            self.pretrained_weights[1]['fc1.weight'] + \
            norm_gamma * self.pretrained_weights[2]['fc1.weight']
        combined_bias1 = norm_alpha * self.pretrained_weights[0]['fc1.bias'] + norm_beta * \
            self.pretrained_weights[1]['fc1.bias'] + \
            norm_gamma * self.pretrained_weights[2]['fc1.bias']
        x = F.linear(x, combined_weight1, combined_bias1)

        x = self.relu(x)

        combined_weight2 = norm_alpha * self.pretrained_weights[0]['fc2.weight'] + norm_beta * \
            self.pretrained_weights[1]['fc2.weight'] + \
            norm_gamma * self.pretrained_weights[2]['fc2.weight']
        combined_bias2 = norm_alpha * self.pretrained_weights[0]['fc2.bias'] + norm_beta * \
            self.pretrained_weights[1]['fc2.bias'] + \
            norm_gamma * self.pretrained_weights[2]['fc2.bias']
        x = F.linear(x, combined_weight2, combined_bias2)

        return x


# 设备配置
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 加载预训练的state_dicts
# 注意: 这里仅为示例，您需要加载您自己的state_dicts
state_dict1 = torch.load("1_92.17%.pth")
state_dict2 = torch.load("2_92.05%.pth")
state_dict3 = torch.load("3_92.03%.pth")

# 实例化模型
model = CombinedMLP(28*28, 500, 10, [state_dict1, state_dict2, state_dict3])
model = model.to(device)

# 创建优化器
optimizer = optim.Adam([model.alpha, model.beta, model.gamma], lr=0.005)

# 定义损失函数
criterion = nn.CrossEntropyLoss()


def custom_loss(outputs, labels, alpha, beta, gamma, lam=1e-5):
    classification_loss = F.cross_entropy(outputs, labels)
    reg_loss = lam * (alpha**2 + beta**2 + gamma**2)
    total_loss = classification_loss + reg_loss
    return total_loss


# 加载MNIST数据集
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = torchvision.datasets.MNIST(
    root='./data', train=True, transform=transform, download=True)
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=100, shuffle=True)

# 训练模型
num_epochs = 5
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # 前向传播
        outputs = model(images)
        loss = criterion(outputs, labels)


        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print("model.alpha:", model.alpha.grad, "model.beta:",
              model.beta.grad, "model.gamma:", model.gamma.grad)

        if (i+1) % 100 == 0:
            print(
                f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')


model.alpha: tensor([-0.5917], device='cuda:0') model.beta: tensor([0.2191], device='cuda:0') model.gamma: tensor([-0.5163], device='cuda:0')
model.alpha: tensor([-0.6746], device='cuda:0') model.beta: tensor([0.2480], device='cuda:0') model.gamma: tensor([-0.6167], device='cuda:0')
model.alpha: tensor([-0.5878], device='cuda:0') model.beta: tensor([0.2156], device='cuda:0') model.gamma: tensor([-0.5697], device='cuda:0')
model.alpha: tensor([-0.5946], device='cuda:0') model.beta: tensor([0.2112], device='cuda:0') model.gamma: tensor([-0.5686], device='cuda:0')
model.alpha: tensor([-0.4245], device='cuda:0') model.beta: tensor([0.1469], device='cuda:0') model.gamma: tensor([-0.4069], device='cuda:0')
model.alpha: tensor([-0.5984], device='cuda:0') model.beta: tensor([0.2020], device='cuda:0') model.gamma: tensor([-0.5757], device='cuda:0')
model.alpha: tensor([-0.5279], device='cuda:0') model.beta: tensor([0.1770], device='cuda:0') model.gamma: tensor([-0.5353], device='cuda:0')
model.

KeyboardInterrupt: 