In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
import numpy as np

In [12]:
# 加载数据集
data = load_breast_cancer()
X = data.data
y = data.target

# 数据标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 数据集划分
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 将数据转换为 PyTorch 张量
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

In [18]:
# # 构建简单的二分类模型
# class SimpleNN(nn.Module):
#     def __init__(self, input_dim):
#         super(SimpleNN, self).__init__()
#         self.fc1 = nn.Linear(input_dim, 64)
#         self.fc2 = nn.Linear(64, 32)
#         self.fc3 = nn.Linear(32, 2)  # 输出2个类别
        
#     def forward(self, x):
#         x = torch.relu(self.fc1(x))
#         x = torch.relu(self.fc2(x))
#         x = self.fc3(x)
#         return x

In [50]:
# Mixup 数据增强实现
def mixup_data(x, y, alpha=1.0):
    """ Mixup 数据增强：生成新的特征和标签 """
    lam = torch.rand(1).item() * alpha  # 随机生成 lambda 值
    batch_size = x.size(0)
    index = torch.randperm(batch_size).to(x.device)  # 随机打乱索引
    
    mixed_x = lam * x + (1 - lam) * x[index, :]
    mixed_y = lam * y + (1 - lam) * y[index]  # 标签也进行加权插值
    
    return mixed_x, mixed_y, lam

# 构建简单的二分类模型
class SimpleNN(nn.Module):
    def __init__(self, input_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 2)  # 输出2个类别，注意是二分类问题

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [57]:
# 初始化模型, 定义损失函数和优化器
model = SimpleNN(X_train.shape[1])  # 输入维度为特征数量
criterion = nn.CrossEntropyLoss()  # 使用交叉熵损失函数
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam优化器

In [58]:
# 训练模型
num_epochs = 30
for epoch in range(num_epochs):
    model.train()

    # 使用 Mixup 增强训练数据
    mixed_X, mixed_y, lam = mixup_data(X_train_tensor, y_train_tensor, alpha=1.0)
    
    # 前向传播
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    
    # 反向传播
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [10/30], Loss: 0.5355
Epoch [20/30], Loss: 0.3792
Epoch [30/30], Loss: 0.2394


In [59]:
# 评估模型准确率
model.eval()  # 设置模型为评估模式
with torch.no_grad():
    # 预测
    outputs = model(X_test_tensor)
    _, predicted = torch.max(outputs, 1)
    
    # 计算准确率
    accuracy = metrics.accuracy_score(y_test, predicted.numpy())
    print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 97.37%
