In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from torch.cuda.amp import autocast, GradScaler
from torch.optim.lr_scheduler import CosineAnnealingLR

# 超参数配置
config = {
    "batch_size": 512,         # 大batch size加速训练[6](@ref)
    "base_lr": 0.1,            # 初始学习率[6](@ref)
    "weight_decay": 5e-4,      # L2正则化[6](@ref)
    "momentum": 0.9,           # Nesterov动量[6](@ref)
    "epochs": 200,             
    "cutout": True,            # 高级数据增强[5](@ref)
    "label_smoothing": 0.1,    # 标签平滑[4](@ref)
    "mixup_alpha": 0.2         # MixUp增强[4](@ref)
}

# 设备配置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 数据增强策略（整合网页[1,5,8](@ref)）
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4)], p=0.8),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    transforms.RandomErasing(p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3))  # Cutout增强[5](@ref)
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

# 加载数据集（网页[2,5](@ref)）
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)

train_loader = DataLoader(trainset, batch_size=config["batch_size"], shuffle=True, num_workers=4, pin_memory=True)
test_loader = DataLoader(testset, batch_size=config["batch_size"], shuffle=False, num_workers=4, pin_memory=True)

# 模型定义（基于网页[6,7](@ref)的预训练策略改进）
class CifarResNet(nn.Module):
    def __init__(self):
        super().__init__()
        # 加载预训练ResNet-50并修改输入层[7](@ref)
        self.base_model = torchvision.models.resnet50(pretrained=True)
        self.base_model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)  # 适配32x32输入[5](@ref)
        self.base_model.maxpool = nn.Identity()  # 移除原最大池化层[5](@ref)
        
        # 修改全连接层[6](@ref)
        in_features = self.base_model.fc.in_features
        self.base_model.fc = nn.Linear(in_features, 10)

    def forward(self, x):
        return self.base_model(x)

model = CifarResNet().to(device)

# 混合样本增强函数（网页[4](@ref)）
def mixup_data(x, y, alpha=1.0):
    if alpha > 0:
        lam = torch.distributions.beta.Beta(alpha, alpha).sample().item()
    else:
        lam = 1.0
    
    batch_size = x.size(0)
    index = torch.randperm(batch_size).to(device)
    
    mixed_x = lam * x + (1 - lam) * x[index]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

# 损失函数（带标签平滑）
class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, smoothing=0.1):
        super().__init__()
        self.smoothing = smoothing

    def forward(self, pred, target):
        log_prob = F.log_softmax(pred, dim=-1)
        nll_loss = -log_prob.gather(dim=-1, index=target.unsqueeze(1))
        nll_loss = nll_loss.squeeze(1)
        smooth_loss = -log_prob.mean(dim=-1)
        loss = (1.0 - self.smoothing) * nll_loss + self.smoothing * smooth_loss
        return loss.mean()

# 训练函数（整合网页[4,6,8](@ref)优化策略）
def train(model, device, train_loader, optimizer, scheduler, scaler, epoch):
    model.train()
    criterion = LabelSmoothingCrossEntropy(smoothing=config["label_smoothing"])
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        
        # 应用MixUp增强[4](@ref)
        data, targets_a, targets_b, lam = mixup_data(data, target, config["mixup_alpha"])
        
        optimizer.zero_grad()
        with autocast():
            output = model(data)
            loss = lam * criterion(output, targets_a) + (1 - lam) * criterion(output, targets_b)
        
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # 梯度裁剪[6](@ref)
        scaler.step(optimizer)
        scaler.update()
        
    scheduler.step()

# 测试函数
def test(model, device, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    return 100.0 * correct / total

# 初始化优化器（网页[6](@ref)配置）
optimizer = optim.SGD(model.parameters(), 
                     lr=config["base_lr"],
                     momentum=config["momentum"],
                     weight_decay=config["weight_decay"],
                     nesterov=True)

# 学习率调度器（网页[6](@ref)余弦退火）
scheduler = CosineAnnealingLR(optimizer, T_max=config["epochs"])
scaler = GradScaler()  # 混合精度训练[4](@ref)

# 训练循环
best_acc = 0.0
for epoch in range(config["epochs"]):
    train(model, device, train_loader, optimizer, scheduler, scaler, epoch)
    acc = test(model, device, test_loader)
    
    if acc > best_acc:
        best_acc = acc
        torch.save(model.state_dict(), "best_model.pth")
    
    print(f"Epoch {epoch+1}/{config['epochs']} | Test Acc: {acc:.2f}%")

print(f"\nBest Test Accuracy: {best_acc:.2f}%")



Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\11834/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth


100%|██████████| 97.8M/97.8M [00:18<00:00, 5.40MB/s]
  scaler = GradScaler()  # 混合精度训练[4](@ref)
  with autocast():


NameError: name 'F' is not defined