In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 定义数据预处理
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# 加载CIFAR10数据集
trainset = torchvision.datasets.CIFAR10(root='./cifar10', train=True, download=True, transform=transform_train)
trainloader = DataLoader(trainset, batch_size=512, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./cifar10', train=False, download=True, transform=transform_test)
testloader = DataLoader(testset, batch_size=200, shuffle=False, num_workers=2)

# 定义修改后的ResNet18模型
class ModifiedResNet18(nn.Module):
    def __init__(self, num_classes=10):
        super(ModifiedResNet18, self).__init__()
        # 加载预训练的ResNet18
        self.resnet18 = torchvision.models.resnet18(pretrained=True)
        # 修改第一层卷积以适应CIFAR10的32x32图像
        self.resnet18.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.resnet18.maxpool = nn.Identity()  # 移除最大池化层
        
        # 获取ResNet18的特征提取部分
        self.features = nn.Sequential(*list(self.resnet18.children())[:-1])
        
        # 添加MLP和全连接层
        self.mlp = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5)
        )
        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.mlp(x)
        x = self.fc(x)
        return x

# 初始化模型
model = ModifiedResNet18().to(device)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 150], gamma=0.1)

# 训练模型
def train_model(model, criterion, optimizer, scheduler, num_epochs=200):
    best_acc = 0.0
    for epoch in range(num_epochs):
        # 训练阶段
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        train_loss = running_loss / len(trainloader)
        train_acc = 100. * correct / total
        print(f'Epoch: {epoch+1}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')

        # 测试阶段
        model.eval()
        test_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                test_loss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()
        test_loss /= len(testloader)
        test_acc = 100. * correct / total
        print(f'Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')

        # 保存最佳模型
        if test_acc > best_acc:
            best_acc = test_acc
            torch.save(model.state_dict(), 'best_model.pth')
            print(f'Saved best model with accuracy {best_acc:.2f}%')

        # 调整学习率
        scheduler.step()

    print('Finished Training')

# 调用训练函数
train_model(model, criterion, optimizer, scheduler)

Files already downloaded and verified
Files already downloaded and verified
Epoch: 1, Train Loss: 1.2329, Train Acc: 56.55%
Test Loss: 0.7524, Test Acc: 75.72%
Saved best model with accuracy 75.72%
Epoch: 2, Train Loss: 0.5925, Train Acc: 81.64%
Test Loss: 0.5636, Test Acc: 82.76%
Saved best model with accuracy 82.76%
Epoch: 3, Train Loss: 0.4694, Train Acc: 85.79%
Test Loss: 0.4999, Test Acc: 83.87%
Saved best model with accuracy 83.87%
Epoch: 4, Train Loss: 0.3988, Train Acc: 87.71%
Test Loss: 0.6229, Test Acc: 81.06%
Epoch: 5, Train Loss: 0.3608, Train Acc: 88.89%
Test Loss: 0.5610, Test Acc: 81.55%
Epoch: 6, Train Loss: 0.3252, Train Acc: 89.91%
Test Loss: 0.5619, Test Acc: 82.33%
Epoch: 7, Train Loss: 0.3007, Train Acc: 90.66%
Test Loss: 0.5202, Test Acc: 84.52%
Saved best model with accuracy 84.52%
Epoch: 8, Train Loss: 0.2936, Train Acc: 90.96%
Test Loss: 0.5120, Test Acc: 84.34%
Epoch: 9, Train Loss: 0.2696, Train Acc: 91.58%
Test Loss: 0.3844, Test Acc: 87.97%
Saved best model

KeyboardInterrupt: 

In [None]:
train_model(model, criterion, optimizer, scheduler)