In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as T
import torchvision.datasets as dset
from torch.utils.data import DataLoader, sampler

# 配置设备与超参数
USE_GPU = True
dtype = torch.float32
device = torch.device('cuda' if USE_GPU and torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# 增强型数据预处理 (整合网页6、8的数据增强策略)
transform_train = T.Compose([
    T.RandomHorizontalFlip(p=0.5),
    T.RandomRotation(15),
    T.RandomCrop(32, padding=4),
    T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    T.ToTensor(),
    T.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

transform_test = T.Compose([
    T.ToTensor(),
    T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

# 数据加载函数
def prepare_dataloaders():
    cifar10_train = dset.CIFAR10('./data', train=True, download=True, transform=transform_train)
    cifar10_test = dset.CIFAR10('./data', train=False, download=True, transform=transform_test)
    
    # 划分验证集
    NUM_TRAIN = 49000
    loader_train = DataLoader(cifar10_train, batch_size=128, 
                            sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))
    loader_val = DataLoader(cifar10_train, batch_size=128,
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))
    loader_test = DataLoader(cifar10_test, batch_size=128)
    return loader_train, loader_val, loader_test

# 改进的ResNet架构 (整合网页1、4、5的残差块设计)
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, 1, stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        return F.relu(out)

class ResNet(nn.Module):
    def __init__(self, num_blocks=[3,4,6,3], num_classes=10):
        super().__init__()
        self.in_channels = 64
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.layer1 = self._make_layer(64, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(64, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(128, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(256, 512, num_blocks[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, in_channels, out_channels, num_blocks, stride):
        layers = [ResidualBlock(in_channels, out_channels, stride)]
        for _ in range(1, num_blocks):
            layers.append(ResidualBlock(out_channels, out_channels, stride=1))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

# 训练与验证函数 (整合网页2、3的最佳实践)
def train_model(model, optimizer, scheduler, loader_train, loader_val, epochs=50):
    model = model.to(device)
    best_acc = 0.0
    
    for epoch in range(epochs):
        # 训练阶段
        model.train()
        for x, y in loader_train:
            x, y = x.to(device), y.to(device)
            
            optimizer.zero_grad()
            scores = model(x)
            loss = F.cross_entropy(scores, y)
            loss.backward()
            optimizer.step()
        
        # 验证阶段
        model.eval()
        val_acc = check_accuracy(loader_val, model)
        scheduler.step()
        
        # 保存最佳模型
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), 'best_model.pth')
            
        print(f"Epoch {epoch+1}/{epochs} | Val Acc: {val_acc:.2f}%")
    
    # 加载最佳模型
    model.load_state_dict(torch.load('best_model.pth'))
    return model

def check_accuracy(loader, model):
    model.eval()
    num_correct = 0
    num_samples = 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
    acc = float(num_correct) / num_samples
    return acc * 100

# 主程序
if __name__ == "__main__":
    # 准备数据
    loader_train, loader_val, loader_test = prepare_dataloaders()
    
    # 初始化模型
    model = ResNet(num_blocks=[3,4,6,3])
    
    # 优化器配置 (参考网页7、8的协同优化策略)
    optimizer = optim.AdamW(
        model.parameters(), 
        lr=3e-4,
        weight_decay=5e-4
    )
    scheduler = optim.lr_scheduler.CosineAnnealingLR(
        optimizer, 
        T_max=200
    )
    
    # 训练模型
    trained_model = train_model(
        model, 
        optimizer,
        scheduler,
        loader_train,
        loader_val,
        epochs=50
    )
    
    # 最终测试
    test_acc = check_accuracy(loader_test, trained_model)
    print(f"\nFinal Test Accuracy: {test_acc:.2f}%")

Using device: cuda


100%|██████████| 170M/170M [00:29<00:00, 5.83MB/s] 


Epoch 1/50 | Val Acc: 49.50%
Epoch 2/50 | Val Acc: 65.30%
Epoch 3/50 | Val Acc: 68.40%
Epoch 4/50 | Val Acc: 75.00%
Epoch 5/50 | Val Acc: 76.60%
Epoch 6/50 | Val Acc: 74.90%
Epoch 7/50 | Val Acc: 77.70%
Epoch 8/50 | Val Acc: 81.80%
Epoch 9/50 | Val Acc: 78.30%
Epoch 10/50 | Val Acc: 83.10%
Epoch 11/50 | Val Acc: 83.70%
Epoch 12/50 | Val Acc: 82.50%
Epoch 13/50 | Val Acc: 83.10%
Epoch 14/50 | Val Acc: 83.50%
Epoch 15/50 | Val Acc: 83.90%
Epoch 16/50 | Val Acc: 83.80%
Epoch 17/50 | Val Acc: 86.60%
Epoch 18/50 | Val Acc: 85.60%
Epoch 19/50 | Val Acc: 83.90%
Epoch 20/50 | Val Acc: 85.50%
Epoch 21/50 | Val Acc: 87.50%
Epoch 22/50 | Val Acc: 88.80%
Epoch 23/50 | Val Acc: 86.50%
Epoch 24/50 | Val Acc: 85.60%
Epoch 25/50 | Val Acc: 87.80%
Epoch 26/50 | Val Acc: 89.90%
Epoch 27/50 | Val Acc: 88.50%
Epoch 28/50 | Val Acc: 88.30%
Epoch 29/50 | Val Acc: 87.20%
Epoch 30/50 | Val Acc: 89.60%
Epoch 31/50 | Val Acc: 88.80%
Epoch 32/50 | Val Acc: 89.20%
Epoch 33/50 | Val Acc: 89.50%
Epoch 34/50 | Val A