In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# 构造 MLP 模型
class MLP(nn.Module):
    def __init__(self, input_size, output_size, hidden_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# 构造集成模型
class Ensemble(nn.Module):
    def __init__(self, n_models, input_size, output_size, hidden_size):
        super(Ensemble, self).__init__()
        self.n_models = n_models
        self.hidden_size = hidden_size
        self.models = nn.ModuleList()
        for i in range(self.n_models):
            self.models.append(MLP(input_size, output_size, hidden_size))
    
    def forward(self, x):
        outputs = []
        for model in self.models:
            output = model(x)
            outputs.append(output)
        y_ensemble = torch.mean(torch.stack(outputs), dim=0)
        return y_ensemble

# 定义数据集
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# 定义训练函数
def train(model, dataloader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    for inputs, labels in dataloader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    epoch_loss = running_loss / len(dataloader.dataset)
    return epoch_loss

# 定义测试函数
def test(model, dataloader):
    model.eval()
    y_pred = torch.tensor([], dtype=torch.float32)
    y_true = torch.tensor([], dtype=torch.float32)
    with torch.no_grad():
        for inputs, labels in dataloader:
            outputs = model(inputs)
            predictions = torch.sigmoid(outputs)
            y_pred = torch.cat((y_pred, predictions), dim=0)
            y_true = torch.cat((y_true, labels), dim=0)
    return y_pred, y_true

# 生成数据集
n_features = 20
X, y = make_classification(n_samples=10000, n_features=n_features, n_informative=10, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
train_dataset = CustomDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float())
test_dataset = CustomDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float())
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=4)

# 定义超参数
input_size = n_features
output_size = 1
hidden_size = 20
lr = 0.01
n_epochs = 50
n_models = 5

# 初始化集成模型和优化器
ensemble = Ensemble(n_models, input_size, output_size, hidden_size)
optimizer = optim.Adam(ensemble.parameters(), lr=lr)
criterion = nn.BCEWithLogitsLoss()

# 训练集成模型
for epoch in range(n_epochs):
    train_loss = train(ensemble, train_dataloader, criterion, optimizer)
    y_pred, y_true = test(ensemble, test_dataloader)
    test_acc = accuracy_score(y_true.numpy(), torch.round(y_pred).numpy())
    print('Epoch {}/{}  Train Loss: {:.4f}  Test Accuracy: {:.4f}'.format(epoch+1, n_epochs, train_loss, test_acc))